9 Multivariate methods for heterogeneous data

## -----------------------------------------------------------------------------
library("pheatmap")
dir(system.file("extdata", package = "MSMB"))
##  [1] "01126211_NB8_I025.fcs"  "01247181_NB06_I025.fcs" "02276161_NB8_I025.fcs" 
##  [4] "03157141_NB06_I025.fcs" "05107251_NB06_I025.fcs" "06226101_NB8_I025.fcs" 
##  [7] "07115141_NB8_I025.fcs"  "07215281_NB8_I025.fcs"  "08045071_NB8_I025.fcs" 
## [10] "09225121_NB8_I025.fcs"  "10175181_NB8_I025.fcs"  "10276181_NB8_I025.fcs" 
## [13] "11145351_NB8_I025.fcs"  "12015151_NB8_I025.fcs"  "annotation.txt"        
## [16] "PaintedTurtles.txt"     "statecancerdeaths.csv"
dir(system.file("images", package = "MSMB"))
## [1] "hiv.png"        "image-Cy3.tif"  "image-DAPI.tif" "image-FITC.tif"
## [5] "mosquito.png"
dir(system.file("data", package = "MSMB"))
## [1] "brcalymphnode.RData"  "datalist"             "e100.RData"          
## [4] "ukraine_coords.RData" "ukraine_dists.RData"
dir(system.file("scripts", package = "MSMB"))
## character(0)
data("ukraine_dists", package = "MSMB")
as.matrix(ukraine_dists)[1:4, 1:4]
##                Kyiv    Odesa Sevastopol Chernihiv
## Kyiv         0.0000 441.2548   687.7551  128.1287
## Odesa      441.2548   0.0000   301.7482  558.6483
## Sevastopol 687.7551 301.7482     0.0000  783.6561
## Chernihiv  128.1287 558.6483   783.6561    0.0000
## -----------------------------------------------------------------------------
pheatmap(as.matrix(ukraine_dists), 
  color = colorRampPalette(c("#0057b7", "#ffd700"))(50),
  breaks = seq(0, max(ukraine_dists)^(1/2), length.out = 51)^2,
  treeheight_row = 10, treeheight_col = 10)

## -----------------------------------------------------------------------------
pheatmap(as.matrix(ukraine_dists), 
  color = colorRampPalette(c("blue","white", "red"))(50),
  breaks = seq(0, max(ukraine_dists)^(1/2), length.out = 51)^2,
  treeheight_row = 10, treeheight_col = 10)

## -----------------------------------------------------------------------------
ukraine_mds = cmdscale(ukraine_dists, eig = TRUE)
ukraine_mds
## $points
##                        [,1]        [,2]
## Kyiv             131.153351  169.874539
## Odesa             16.624277 -256.292319
## Sevastopol      -245.755229 -405.370111
## Chernihiv        105.112948  295.355793
## Hostomel         152.902706  180.684913
## Kharkiv         -275.252458  226.879572
## Kherson         -120.217309 -204.620962
## Mariupol        -466.241384  -50.420968
## Volnovakha      -445.205338    1.530718
## Cherkasy          -2.118748   86.947951
## Chernivtsi       410.954903 -131.132200
## Dnipro          -241.445579   39.535552
## Donetsk         -452.074679   52.713423
## Kramatorsk      -411.493140  124.038010
## Ivano-Frankivsk  511.480496  -75.501010
## Izium           -374.498185  164.904503
## Khmelnytskyi     357.335158    6.288698
## Kropyvnytskyi    -42.284972  -10.063808
## Luhansk         -536.915774  147.268066
## Lutsk            499.069258  131.831274
## Lviv             575.022730   18.679743
## Melitopol       -317.100310 -126.114795
## Mykolaiv         -64.292846 -180.495908
## Poltava         -172.173947  149.369540
## Rivne            431.862640  128.771697
## Sumy            -148.564160  295.679652
## Ternopil         458.969320    4.137619
## Uzhhorod         682.405840 -132.409076
## Vinnytsia        247.137629    6.186894
## Zaporizhzhia    -266.264252  -24.722347
## Zhytomyr         255.669234  121.156771
## Simferopol      -279.879385 -356.015502
## ostriv Zmiinyi    26.077203 -398.675921
## 
## $eig
##  [1]  3.913916e+06  1.084943e+06  3.419437e+02  4.842412e-01  2.129017e-01
##  [6]  3.828931e-05  5.895169e-06  5.827564e-07  8.790838e-08  4.945342e-08
## [11]  7.060156e-10  3.426975e-10  2.363593e-10  5.733835e-11  4.074591e-11
## [16]  3.187820e-11 -1.794717e-11 -4.725759e-11 -6.066847e-11 -1.401687e-10
## [21] -1.462420e-10 -1.575528e-10 -3.028042e-10 -4.332777e-10 -4.804013e-10
## [26] -2.394962e-09 -1.512801e-08 -9.604281e-05 -2.505885e-04 -1.409065e-02
## [31] -1.190225e-01 -3.584671e+02 -8.851127e+02
## 
## $x
## NULL
## 
## $ac
## [1] 0
## 
## $GOF
## [1] 0.9996828 0.9999315
seq(along = ukraine_mds$eig)
##  [1]  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
## [26] 26 27 28 29 30 31 32 33
## -----------------------------------------------------------------------------
library("dplyr")
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library("ggplot2")
plotscree = function(x, m = length(x$eig)) {
  ggplot(tibble(eig = x$eig[seq_len(m)], k = seq(along = eig)),
    aes(x = k, y = eig)) + theme_minimal() +
    scale_x_discrete("k", limits = as.factor(seq_len(m))) + 
    geom_bar(stat = "identity", width = 0.5, fill = "#ffd700", col = "#0057b7")
}

## -----------------------------------------------------------------------------
plotscree(ukraine_mds, m = 4)

## -----------------------------------------------------------------------------
ukraine_mds$eig |> signif(3)
##  [1]  3.91e+06  1.08e+06  3.42e+02  4.84e-01  2.13e-01  3.83e-05  5.90e-06
##  [8]  5.83e-07  8.79e-08  4.95e-08  7.06e-10  3.43e-10  2.36e-10  5.73e-11
## [15]  4.07e-11  3.19e-11 -1.79e-11 -4.73e-11 -6.07e-11 -1.40e-10 -1.46e-10
## [22] -1.58e-10 -3.03e-10 -4.33e-10 -4.80e-10 -2.39e-09 -1.51e-08 -9.60e-05
## [29] -2.51e-04 -1.41e-02 -1.19e-01 -3.58e+02 -8.85e+02
plotscree(ukraine_mds)

## -----------------------------------------------------------------------------
stopifnot(any(ukraine_mds$eig < 0))

## -----------------------------------------------------------------------------
ukraine_mds_df = tibble(
  PCo1 = ukraine_mds$points[, 1],
  PCo2 = ukraine_mds$points[, 2],
  labs = rownames(ukraine_mds$points)
)
if (with(ukraine_mds_df, labs[which.max(PCo1)] != "Luhansk"))
  ukraine_mds_df$PCo1 = -ukraine_mds_df$PCo1
if (with(ukraine_mds_df, labs[which.max(PCo2)] != "Sevastopol"))
  ukraine_mds_df$PCo2 = -ukraine_mds_df$PCo2
if(with(ukraine_mds_df,
     labs[which.max(PCo1)] != "Luhansk" ||
     labs[which.max(PCo2)] != "Sevastopol"))
  stop("There is an error with 'ukraine_mds_df'.")
library("ggrepel")
g = ggplot(ukraine_mds_df, aes(x = PCo1, y = PCo2, label = labs)) +
  geom_point() + geom_text_repel(col = "#0057b7") + coord_fixed() 
g

## -----------------------------------------------------------------------------
g %+% mutate(ukraine_mds_df, PCo1 = PCo1, PCo2 = -PCo2)

## -----------------------------------------------------------------------------
data("ukraine_coords", package = "MSMB")
print.data.frame(ukraine_coords[1:4,  c("city", "lat", "lon")])
##         city      lat      lon
## 1       Kyiv 50.45003 30.52414
## 2      Odesa 46.48430 30.73229
## 3 Sevastopol 44.60544 33.52208
## 4  Chernihiv 51.49410 31.29433
ggplot(ukraine_coords, aes(x = lon, y = lat, label = city)) +
  geom_point() + geom_text_repel(col = "#0057b7")

## -----------------------------------------------------------------------------
earthradius = 6371

## -----------------------------------------------------------------------------
## library("sf")
## library("rnaturalearth")
## library("rnaturalearthdata")
## world = ne_countries(scale = "medium", returnclass = "sf")
## ggplot() +
##   geom_sf(data = world,
##           fill = ifelse(world$geounit == "Ukraine", "#ffd700", "#f0f0f0")) +
##   coord_sf(xlim = range(ukraine_coords$lon) + c(-0.5, + 2),
##            ylim = range(ukraine_coords$lat) + c(-0.5, + 1)) +
##   geom_point(aes(x = lon, y = lat), data = ukraine_coords) +
##   geom_text_repel(aes(x = lon, y = lat, label = city),
##                   color = "#0057b7", data = ukraine_coords)
## -----------------------------------------------------------------------------
library("sf")
## Linking to GEOS 3.9.3, GDAL 3.5.2, PROJ 8.2.1; sf_use_s2() is TRUE
library("rnaturalearth")
library("rnaturalearthdata")
## 
## Attaching package: 'rnaturalearthdata'
## The following object is masked from 'package:rnaturalearth':
## 
##     countries110
world = ne_countries(scale = "medium", returnclass = "sf")
ggplot() + 
  geom_sf(data = world, 
          fill = ifelse(world$geounit == "Ukraine", "#ffd700", "#f0f0f0")) +
  coord_sf(xlim = range(ukraine_coords$lon) + c(-0.5, + 2), 
           ylim = range(ukraine_coords$lat) + c(-0.5, + 1)) +
  geom_point(aes(x = lon, y = lat), data = ukraine_coords) + 
  geom_text_repel(aes(x = lon, y = lat, label = city), 
                  color = "#0057b7", data = ukraine_coords)

## -----------------------------------------------------------------------------
stopifnot(48 == round(mean(range(ukraine_coords$lat))))

## -----------------------------------------------------------------------------
X = with(ukraine_coords, cbind(lon, lat * cos(48)))
DdotD = as.matrix(dist(X)^2)
DdotD
##            1          2           3         4         5          6           7
## 1   0.000000  6.4880224  22.9856271  1.039897  0.078079  32.654314  10.3616001
## 2   6.488022  0.0000000   9.2295496 10.600698  7.125703  35.278880   3.5954616
## 3  22.985627  9.2295496   0.0000000 24.408633 25.315006  19.229610   2.5017166
## 4   1.039897 10.6006978  24.4086328  0.000000  1.407973  25.295041  11.4232240
## 5   0.078079  7.1257035  25.3150061  1.407973  0.000000  35.809417  11.9852323
## 6  32.654314 35.2788795  19.2296102 25.295041 35.809417   0.000000  17.5993187
## 7  10.361600  3.5954616   2.5017166 11.423224 11.985232  17.599319   0.0000000
## 8  53.972768 46.6338958  18.7651629 47.060335 58.155175   5.177725  24.3320810
## 9  51.992827 46.3085695  19.4934651 44.727463 56.091725   3.958303  24.1318623
## 10  2.769228  5.3511335  11.7381155  2.305343  3.774690  17.530386   3.5423098
## 11 22.954031 24.3194237  63.0761602 32.910235 20.845953 107.145729  45.8403927
## 12 22.018812 20.1842085   8.4232381 17.795757 24.716189   2.366426   7.2044150
## 13 55.385733 50.9327475  23.0782617 47.298755 59.597158   4.066677  27.5605946
## 14 51.046844 49.0342830  23.5037603 42.675351 55.061346   2.475354  26.3907422
## 15 34.756619 38.7002472  85.2844384 46.059139 31.925884 133.195415  64.7873242
## 16 46.269443 45.8547444  22.7280802 37.982124 50.070500   1.359925  24.3117095
## 17 13.000373 17.6151138  52.3043613 20.382276 11.316134  85.727149  35.0452806
## 18  4.574192  4.0336288   7.8279385  4.591068  5.795135  16.623990   1.5616478
## 19 78.414437 75.1452366  39.7997243 67.547454 83.355898  10.229200  46.0363062
## 20 27.117892 36.7313172  82.7197655 35.921616 24.403916 119.280738  60.2747731
## 21 42.304651 49.5191592 101.3061578 53.865935 39.009970 148.835175  78.0583059
## 22 28.926488 21.6804208   5.5204884 25.565543 31.988274   4.774306   7.6179846
## 23  7.106451  1.6908499   4.6376757  8.855028  8.357252  21.681212   0.4450834
## 24 16.517262 18.5328607  11.2386252 12.090665 18.827301   2.889551   7.2681010
## 25 18.268772 27.0867643  67.6860972 25.745375 16.062651  99.755626  47.1197473
## 26 18.394173 24.6023909  17.9382747 12.448015 20.687991   2.386461  12.2132951
## 27 24.654798 30.2896090  72.9301265 34.057498 22.219637 113.269151  52.9567180
## 28 68.968119 72.9386894 132.4966991 84.236831 64.894893 194.779354 108.1837383
## 29  4.835741  8.2209595  34.3155350 10.085170  3.961850  60.501664  20.0379322
## 30 23.874762 20.0021682   6.8638173 20.062002 26.682623   3.117502   6.8120113
## 31  3.455485 10.0975102  36.6518241  7.515411  2.571843  57.209844  21.0200913
## 32 25.191056 12.3202056   0.3861232 25.423348 27.788825  14.940535   3.3497843
## 33 11.163179  0.8992753  11.1870745 17.142752 11.659737  45.530470   6.6561563
##              8           9         10          11          12          13
## 1   53.9727683  51.9928268  2.7692280  22.9540306  22.0188120  55.3857335
## 2   46.6338958  46.3085695  5.3511335  24.3194237  20.1842085  50.9327475
## 3   18.7651629  19.4934651 11.7381155  63.0761602   8.4232381  23.0782617
## 4   47.0603353  44.7274629  2.3053433  32.9102352  17.7957569  47.2987549
## 5   58.1551751  56.0917253  3.7746903  20.8459526  24.7161886  59.5971583
## 6    5.1777254   3.9583027 17.5303861 107.1457286   2.3664261   4.0666770
## 7   24.3320810  24.1318623  3.5423098  45.8403927   7.2044150  27.5605946
## 8    0.0000000   0.1060518 32.4142324 135.4267024   7.0626837   0.4101164
## 9    0.1060518   0.0000000 31.0022246 133.8784130   6.3520337   0.1623058
## 10  32.4142324  31.0022246  0.0000000  38.0180085   9.2891976  33.8136937
## 11 135.4267024 133.8784130 38.0180085   0.0000000  82.8984697 140.7770862
## 12   7.0626837   6.3520337  9.2891976  82.8984697   0.0000000   7.6989994
## 13   0.4101164   0.1623058 33.8136937 140.7770862   7.6989994   0.0000000
## 14   1.1076157   0.5401810 30.7364262 135.7301685   6.4949432   0.2613170
## 15 166.2239096 164.2905812 54.1116632   1.6721332 106.8235561 171.7117011
## 16   1.8733377   1.0888513 27.2708746 128.9483727   5.2169786   0.8396915
## 17 113.9502215 112.0395063 25.8005757   1.6113875  65.3732534 117.9223526
## 18  28.7444513  27.7380380  0.4004237  40.0638505   7.7077111  30.7443997
## 19   3.9459223   3.6190559 52.7087524 178.5139119  18.1140620   2.3645432
## 20 155.0273544 152.4043049 46.1029508   2.8584371  96.6360353 158.8342094
## 21 185.8367374 183.4566102 64.5003925   4.6245529 121.9975855 190.9724268
## 22   4.7223181   4.7137610 13.8146448  90.0588669   1.1934114   6.4098252
## 23  30.8749774  30.4735925  2.5020857  37.3828173  10.2015158  34.1688289
## 24  11.5438346  10.3218652  6.2187455  74.8822351   0.7566720  11.5811090
## 25 132.7479104 130.2698752 34.2922323   2.3290794  79.1691345 136.1811840
## 26  13.5149439  11.7735098  8.4116112  81.4151014   2.5047313  12.4284445
## 27 145.4754513 143.3695970 41.8257719   0.7797699  89.7851853 150.0424988
## 28 233.4475309 231.3965135 95.4669178  13.2623426 162.3049849 240.3723337
## 29  84.3525712  82.6683411 12.9124360   6.7688995  43.4539812  87.7177864
## 30   6.1466459   5.6979458 10.4017779  84.3618171   0.1619747   7.2099473
## 31  82.9698040  80.8823184 11.7612388   9.0572990  41.9248674  85.4587660
## 32  13.7680957  14.4117366 12.4477618  71.2203632   5.9477644  17.5279469
## 33  55.3621479  55.4903188 10.6369416  21.9621310  27.6416578  60.8541043
##             14          15          16         17         18         19
## 1   51.0468436  34.7566185  46.2694425  13.000373  4.5741916  78.414437
## 2   49.0342830  38.7002472  45.8547444  17.615114  4.0336288  75.145237
## 3   23.5037603  85.2844384  22.7280802  52.304361  7.8279385  39.799724
## 4   42.6753513  46.0591391  37.9821242  20.382276  4.5910676  67.547454
## 5   55.0613465  31.9258842  50.0704997  11.316134  5.7951352  83.355898
## 6    2.4753537 133.1954152   1.3599252  85.727149 16.6239900  10.229200
## 7   26.3907422  64.7873242  24.3117095  35.045281  1.5616478  46.036306
## 8    1.1076157 166.2239096   1.8733377 113.950222 28.7444513   3.945922
## 9    0.5401810 164.2905812   1.0888513 112.039506 27.7380380   3.619056
## 10  30.7364262  54.1116632  27.2708746  25.800576  0.4004237  52.708752
## 11 135.7301685   1.6721332 128.9483727   1.611388 40.0638505 178.513912
## 12   6.4949432 106.8235561   5.2169786  65.373253  7.7077111  18.114062
## 13   0.2613170 171.7117011   0.8396915 117.922353 30.7443997   2.364543
## 14   0.0000000 165.7552932   0.1774971 112.655939 28.3105022   2.945604
## 15 165.7552932   0.0000000 157.9865986   5.249904 57.1522405 212.830899
## 16   0.1774971 157.9865986   0.0000000 106.091437 25.3179313   4.233319
## 17 112.6559395   5.2499042 106.0914373   0.000000 28.2830705 152.026178
## 18  28.3105022  57.1522405  25.3179313  28.283070  0.0000000  49.446153
## 19   2.9456036 212.8308993   4.2333189 152.026178 49.4461533   0.000000
## 20 152.0623384   1.7329850 143.9909793   3.473179 50.2867072 197.298787
## 21 184.1766497   0.8070818 175.6516938   8.762533 68.5257162 233.703500
## 22   6.3145013 115.6660533   5.8463546  73.328999 10.8507632  16.543333
## 23  32.5265313  54.6044000  29.9367918  27.593343  1.0389917  54.382512
## 24   9.4992741  97.0173969   7.5049307  57.338190  5.6992173  22.954155
## 25 129.8877406   3.5549081 122.4327600   1.120143 37.9946843 171.916694
## 26   9.6723868 103.4795178   7.3419552  62.118097  8.8002086  22.445250
## 27 144.0933542   0.9414863 136.6293157   1.932732 44.9864952 188.235620
## 28 233.5488913   5.8356794 224.4179084  22.135947 99.2738910 288.833062
## 29  83.2084900  14.1592319  77.6244859   2.230343 14.6354511 117.453280
## 30   6.4048687 108.7964824   5.4025969  67.250419  8.3160511  17.677257
## 31  80.4277143  16.4058690  74.5857569   3.144885 14.1879378 114.123887
## 32  17.9998359  94.6726029  17.4507685  58.917339  8.5629275  32.354939
## 33  59.4547080  35.6854054  56.4073614  17.501774  8.5969121  87.209365
##             20          21          22         23          24          25
## 1   27.1178918  42.3046509  28.9264882  7.1064514  16.5172625  18.2687722
## 2   36.7313172  49.5191592  21.6804208  1.6908499  18.5328607  27.0867643
## 3   82.7197655 101.3061578   5.5204884  4.6376757  11.2386252  67.6860972
## 4   35.9216163  53.8659352  25.5655426  8.8550276  12.0906650  25.7453751
## 5   24.4039163  39.0099702  31.9882737  8.3572517  18.8273009  16.0626514
## 6  119.2807381 148.8351746   4.7743059 21.6812117   2.8895513  99.7556263
## 7   60.2747731  78.0583059   7.6179846  0.4450834   7.2681010  47.1197473
## 8  155.0273544 185.8367374   4.7223181 30.8749774  11.5438346 132.7479104
## 9  152.4043049 183.4566102   4.7137610 30.4735925  10.3218652 130.2698752
## 10  46.1029508  64.5003925  13.8146448  2.5020857   6.2187455  34.2922323
## 11   2.8584371   4.6245529  90.0588669 37.3828173  74.8822351   2.3290794
## 12  96.6360353 121.9975855   1.1934114 10.2015158   0.7566720  79.1691345
## 13 158.8342094 190.9724268   6.4098252 34.1688289  11.5811090 136.1811840
## 14 152.0623384 184.1766497   6.3145013 32.5265313   9.4992741 129.8877406
## 15   1.7329850   0.8070818 115.6660533 54.6044000  97.0173969   3.5549081
## 16 143.9909793 175.6516938   5.8463546 29.9367918   7.5049307 122.4327600
## 17   3.4731794   8.7625333  73.3289991 27.5933431  57.3381897   1.1201430
## 18  50.2867072  68.5257162  10.8507632  1.0389917   5.6992173  37.9946843
## 19 197.2987871 233.7035003  16.5433329 54.3825123  22.9541549 171.9166940
## 20   0.0000000   1.9944279 107.4844698 50.3625840  85.7531091   0.8736548
## 21   1.9944279   0.0000000 132.5246212 66.7655744 110.6796917   5.1749993
## 22 107.4844698 132.5246212   0.0000000 11.4905380   3.7753888  89.2158432
## 23  50.3625840  66.7655744  11.4905380  0.0000000   9.3371728  38.4187253
## 24  85.7531091 110.6796917   3.7753888  9.3371728   0.0000000  69.3159754
## 25   0.8736548   5.1749993  89.2158432 38.4187253  69.3159754   0.0000000
## 26  89.9329066 116.4875079   7.1085601 14.2381887   0.7799218  73.1624224
## 27   0.6534950   2.4680781  98.8679685 43.7141373  80.2625198   0.8986299
## 28  10.9536753   3.6001028 172.3907456 95.0401638 150.4101585  17.2296970
## 29  10.8473929  19.8339421  50.1453244 14.5185172  37.0531360   5.7025898
## 30  99.4376196 124.5394882   0.4830476 10.0750633   1.5612237  81.7647462
## 31  11.3133377  21.5792844  49.8446909 15.4731461  34.7767520   5.8993729
## 32  90.8822475 111.2209374   3.1099167  6.1240960   9.0143452  74.8032424
## 33  36.1980833  46.7127210  27.8648792  4.4202551  26.6011858  27.4123278
##             26          27         28          29          30          31
## 1   18.3941726  24.6547978  68.968119   4.8357414  23.8747620   3.4554849
## 2   24.6023909  30.2896090  72.938689   8.2209595  20.0021682  10.0975102
## 3   17.9382747  72.9301265 132.496699  34.3155350   6.8638173  36.6518241
## 4   12.4480152  34.0574981  84.236831  10.0851702  20.0620022   7.5154114
## 5   20.6879914  22.2196370  64.894893   3.9618499  26.6826232   2.5718425
## 6    2.3864611 113.2691512 194.779354  60.5016636   3.1175020  57.2098435
## 7   12.2132951  52.9567180 108.183738  20.0379322   6.8120113  21.0200913
## 8   13.5149439 145.4754513 233.447531  84.3525712   6.1466459  82.9698040
## 9   11.7735098 143.3695970 231.396514  82.6683411   5.6979458  80.8823184
## 10   8.4116112  41.8257719  95.466918  12.9124360  10.4017779  11.7612388
## 11  81.4151014   0.7797699  13.262343   6.7688995  84.3618171   9.0572990
## 12   2.5047313  89.7851853 162.304985  43.4539812   0.1619747  41.9248674
## 13  12.4284445 150.0424988 240.372334  87.7177864   7.2099473  85.4587660
## 14   9.6723868 144.0933542 233.548891  83.2084900   6.4048687  80.4277143
## 15 103.4795178   0.9414863   5.835679  14.1592319 108.7964824  16.4058690
## 16   7.3419552 136.6293157 224.417908  77.6244859   5.4025969  74.5857569
## 17  62.1180974   1.9327317  22.135947   2.2303427  67.2504188   3.1448851
## 18   8.8002086  44.9864952  99.273891  14.6354511   8.3160511  14.1879378
## 19  22.4452504 188.2356198 288.833062 117.4532801  17.6772572 114.1238872
## 20  89.9329066   0.6534950  10.953675  10.8473929  99.4376196  11.3133377
## 21 116.4875079   2.4680781   3.600103  19.8339421 124.5394882  21.5792844
## 22   7.1085601  98.8679685 172.390746  50.1453244   0.4830476  49.8446909
## 23  14.2381887  43.7141373  95.040164  14.5185172  10.0750633  15.4731461
## 24   0.7799218  80.2625198 150.410158  37.0531360   1.5612237  34.7767520
## 25  73.1624224   0.8986299  17.229697   5.7025898  81.7647462   5.8993729
## 26   0.0000000  85.5941413 158.411095  41.2861801   3.9395990  37.7945660
## 27  85.5941413   0.0000000  11.178677   8.3148406  91.9435449   9.6732033
## 28 158.4110950  11.1786775   0.000000  38.1683822 164.4945840  41.6371450
## 29  41.2861801   8.3148406  38.168382   0.0000000  45.0084319   0.4734023
## 30   3.9395990  91.9435449 164.494584  45.0084319   0.0000000  43.9684572
## 31  37.7945660   9.6732033  41.637145   0.4734023  43.9684572   0.0000000
## 32  15.0457895  81.1150292 144.765436  39.2538584   4.4749098  41.0644749
## 33  34.2696580  28.8455288  67.073765   9.4934344  26.9186055  12.6185935
##             32         33
## 1   25.1910564 11.1631791
## 2   12.3202056  0.8992753
## 3    0.3861232 11.1870745
## 4   25.4233484 17.1427515
## 5   27.7888252 11.6597366
## 6   14.9405351 45.5304696
## 7    3.3497843  6.6561563
## 8   13.7680957 55.3621479
## 9   14.4117366 55.4903188
## 10  12.4477618 10.6369416
## 11  71.2203632 21.9621310
## 12   5.9477644 27.6416578
## 13  17.5279469 60.8541043
## 14  17.9998359 59.4547080
## 15  94.6726029 35.6854054
## 16  17.4507685 56.4073614
## 17  58.9173386 17.5017736
## 18   8.5629275  8.5969121
## 19  32.3549392 87.2093651
## 20  90.8822475 36.1980833
## 21 111.2209374 46.7127210
## 22   3.1099167 27.8648792
## 23   6.1240960  4.4202551
## 24   9.0143452 26.6011858
## 25  74.8032424 27.4123278
## 26  15.0457895 34.2696580
## 27  81.1150292 28.8455288
## 28 144.7654360 67.0737651
## 29  39.2538584  9.4934344
## 30   4.4749098 26.9186055
## 31  41.0644749 12.6185935
## 32   0.0000000 15.2411299
## 33  15.2411299  0.0000000
apply(X,2,mean)
##       lon           
##  31.69643 -31.11060
X
##            lon          
##  [1,] 30.52414 -32.29530
##  [2,] 30.73229 -29.75666
##  [3,] 33.52208 -28.55392
##  [4,] 31.29433 -32.96366
##  [5,] 30.25909 -32.38379
##  [6,] 36.23101 -32.00230
##  [7,] 32.62579 -29.85714
##  [8,] 37.54996 -30.14809
##  [9,] 37.49985 -30.46986
## [10,] 32.05878 -31.65180
## [11,] 25.93765 -30.91031
## [12,] 35.04177 -31.02653
## [13,] 37.80134 -30.73709
## [14,] 37.58438 -31.19996
## [15,] 24.71032 -31.31748
## [16,] 37.27841 -31.48958
## [17,] 26.97938 -31.63570
## [18,] 32.26563 -31.05377
## [19,] 39.29732 -31.09290
## [20,] 25.32008 -32.48417
## [21,] 24.03159 -31.90604
## [22,] 35.38273 -29.98867
## [23,] 31.99397 -30.07133
## [24,] 34.55079 -31.74459
## [25,] 26.25132 -32.40386
## [26,] 34.80277 -32.59101
## [27,] 25.59189 -31.72285
## [28,] 22.30226 -31.12534
## [29,] 28.46797 -31.51560
## [30,] 35.11829 -30.63141
## [31,] 28.66923 -32.17355
## [32,] 34.10249 -28.77586
## [33,] 30.20331 -28.96961
sweep(X,2,apply(X,2,mean)) # Return an array obtained from an input array by sweeping out a summary statistic.
##              lon            
##  [1,] -1.1722946 -1.18470529
##  [2,] -0.9641429  1.35393515
##  [3,]  1.8256535  2.55667604
##  [4,] -0.4020987 -1.85305785
##  [5,] -1.4373407 -1.27319014
##  [6,]  4.5345839 -0.89170131
##  [7,]  0.9293633  1.25345675
##  [8,]  5.8535314  0.96251089
##  [9,]  5.8034204  0.64073357
## [10,]  0.3623498 -0.54120352
## [11,] -5.7587775  0.20028757
## [12,]  3.3453404  0.08406815
## [13,]  6.1049100  0.37350736
## [14,]  5.8879505 -0.08935937
## [15,] -6.9861119 -0.20687764
## [16,]  5.5819818 -0.37898026
## [17,] -4.7170514 -0.52510492
## [18,]  0.5691976  0.05682463
## [19,]  7.6008846  0.01769395
## [20,] -6.3763527 -1.37357329
## [21,] -7.6648386 -0.79544530
## [22,]  3.6862974  1.12193122
## [23,]  0.2975359  1.03926631
## [24,]  2.8543641 -0.63399469
## [25,] -5.4451142 -1.29326347
## [26,]  3.1063416 -1.48041607
## [27,] -6.1045447 -0.61224854
## [28,] -9.3941738 -0.01473883
## [29,] -3.2284557 -0.40499835
## [30,]  3.4218560  0.47918841
## [31,] -3.0271962 -1.06294741
## [32,]  2.4060551  2.33473640
## [33,] -1.4931233  2.14098983
n=4
diag(rep(1,n))-(1/n)
##       [,1]  [,2]  [,3]  [,4]
## [1,]  0.75 -0.25 -0.25 -0.25
## [2,] -0.25  0.75 -0.25 -0.25
## [3,] -0.25 -0.25  0.75 -0.25
## [4,] -0.25 -0.25 -0.25  0.75
matrix(1, nrow = n, ncol = n)
##      [,1] [,2] [,3] [,4]
## [1,]    1    1    1    1
## [2,]    1    1    1    1
## [3,]    1    1    1    1
## [4,]    1    1    1    1
diag(rep(1,n))-(1/n) * matrix(1, nrow = n, ncol = n)
##       [,1]  [,2]  [,3]  [,4]
## [1,]  0.75 -0.25 -0.25 -0.25
## [2,] -0.25  0.75 -0.25 -0.25
## [3,] -0.25 -0.25  0.75 -0.25
## [4,] -0.25 -0.25 -0.25  0.75
## -----------------------------------------------------------------------------
n = nrow(X)
H = diag(rep(1,n))-(1/n) * matrix(1, nrow = n, ncol = n)
Xc = sweep(X,2,apply(X,2,mean))
Xc[1:2, ]
##             lon          
## [1,] -1.1722946 -1.184705
## [2,] -0.9641429  1.353935
HX = H %*% X
HX[1:2, ]
##             lon          
## [1,] -1.1722946 -1.184705
## [2,] -0.9641429  1.353935
apply(HX, 2, mean)
##           lon               
## -1.773094e-15  2.227188e-15
## -----------------------------------------------------------------------------
B0 = H  %*% DdotD %*% H
B2 = HX %*% t(HX)
B2[1:3, 1:3] / B0[1:3, 1:3]
##      [,1] [,2] [,3]
## [1,] -0.5 -0.5 -0.5
## [2,] -0.5 -0.5 -0.5
## [3,] -0.5 -0.5 -0.5
max(abs(-0.5 * B0 - B2))
## [1] 1.207923e-13
## -----------------------------------------------------------------------------
ekm = read.table("../data/ekman.txt", header=TRUE)
rownames(ekm) = colnames(ekm)
disekm = 1 - ekm - diag(1, ncol(ekm))
disekm[1:5, 1:5]
##      w434 w445 w465 w472 w490
## w434 0.00 0.14 0.58 0.58 0.82
## w445 0.14 0.00 0.50 0.56 0.78
## w465 0.58 0.50 0.00 0.19 0.53
## w472 0.58 0.56 0.19 0.00 0.46
## w490 0.82 0.78 0.53 0.46 0.00
disekm = as.dist(disekm)
## -----------------------------------------------------------------------------
mdsekm = cmdscale(disekm, eig = TRUE)
plotscree(mdsekm)

## -----------------------------------------------------------------------------
dfekm = mdsekm$points[, 1:2] |>
  `colnames<-`(paste0("MDS", 1:2)) |>
  as_tibble() |>
  mutate(
    name = rownames(ekm),
    rgb = photobiology::w_length2rgb(as.numeric(sub("w", "", name))))
ggplot(dfekm, aes(x = MDS1, y = MDS2)) +
  geom_point(col = dfekm$rgb, size = 4) +
  geom_text_repel(aes(label = name)) + coord_fixed()

dim(stress)
## [1] 100   4
stress
##             [,1]       [,2]       [,3]        [,4]
##   [1,] 0.2564344 0.02310251 0.01244422 0.003105991
##   [2,] 0.2564337 0.02310251 0.01244420 0.002708738
##   [3,] 0.2567348 0.02310251 0.01244422 0.002651645
##   [4,] 0.2568435 0.02310251 0.01244414 0.002867260
##   [5,] 0.2564709 0.02310251 0.01244451 0.003273426
##   [6,] 0.2684059 0.02310251 0.01244596 0.003217022
##   [7,] 0.2564709 0.02310251 0.01244421 0.002744531
##   [8,] 0.2660000 0.02310251 0.01244449 0.003343448
##   [9,] 0.2568456 0.02310251 0.01253515 0.003606675
##  [10,] 0.2563815 0.02310251 0.01244423 0.003507884
##  [11,] 0.2721258 0.02310251 0.01244467 0.002898068
##  [12,] 0.2705604 0.02310251 0.01244417 0.003083802
##  [13,] 0.2564337 0.02310251 0.01244432 0.003718418
##  [14,] 0.2564337 0.02310251 0.01244449 0.002763495
##  [15,] 0.2567348 0.02310251 0.01244427 0.003088338
##  [16,] 0.2567348 0.02310251 0.01244414 0.003487105
##  [17,] 0.2665161 0.02310251 0.01244427 0.002644791
##  [18,] 0.2665161 0.02310251 0.01244641 0.002798770
##  [19,] 0.2563948 0.02310251 0.01244551 0.002673181
##  [20,] 0.2671614 0.02310251 0.01244429 0.003172974
##  [21,] 0.2721258 0.02310251 0.01250509 0.002742815
##  [22,] 0.2564344 0.02310251 0.01244417 0.002913404
##  [23,] 0.2564337 0.02310251 0.01244415 0.002859349
##  [24,] 0.2664448 0.02310251 0.01244423 0.002947876
##  [25,] 0.2563948 0.02310251 0.01244431 0.002696052
##  [26,] 0.2563815 0.02310251 0.01249460 0.002679122
##  [27,] 0.2564709 0.02310251 0.01244416 0.003006277
##  [28,] 0.2564344 0.02310251 0.01244421 0.002975636
##  [29,] 0.2563948 0.02310251 0.01244426 0.002665067
##  [30,] 0.2688540 0.02310251 0.01244428 0.002839643
##  [31,] 0.2567348 0.02310251 0.01244419 0.002837803
##  [32,] 0.2563815 0.02310251 0.01244431 0.002666064
##  [33,] 0.2564344 0.02310251 0.01244417 0.002641424
##  [34,] 0.2646034 0.02310251 0.01244430 0.002726495
##  [35,] 0.2567622 0.02310251 0.01244429 0.003056776
##  [36,] 0.2567348 0.02310251 0.01253515 0.002621917
##  [37,] 0.2564709 0.02310251 0.01244419 0.002771835
##  [38,] 0.2599293 0.02310251 0.01244458 0.003328291
##  [39,] 0.2566013 0.02310251 0.01244436 0.002740097
##  [40,] 0.2564337 0.02310251 0.01244450 0.002620992
##  [41,] 0.2563948 0.02310251 0.01244433 0.003000842
##  [42,] 0.2645525 0.02310251 0.01244497 0.002819788
##  [43,] 0.2567348 0.02310251 0.01244552 0.003533963
##  [44,] 0.2567495 0.02310251 0.01244425 0.002795333
##  [45,] 0.2599882 0.02310251 0.01244422 0.002757198
##  [46,] 0.2566766 0.02310251 0.01244424 0.002839389
##  [47,] 0.2567348 0.02310251 0.01244422 0.002838302
##  [48,] 0.2568435 0.02310251 0.01253479 0.003294939
##  [49,] 0.2564337 0.02310251 0.01244430 0.003597160
##  [50,] 0.2563948 0.02310251 0.01244435 0.002840022
##  [51,] 0.2564337 0.02310251 0.01244424 0.003496745
##  [52,] 0.2563815 0.02310251 0.01244423 0.002670635
##  [53,] 0.2690747 0.02310251 0.01244422 0.002911839
##  [54,] 0.2671614 0.02310251 0.01244429 0.003393294
##  [55,] 0.2564337 0.02310251 0.01244422 0.002652747
##  [56,] 0.2564337 0.02310251 0.01244420 0.002800906
##  [57,] 0.2564344 0.02310251 0.01244424 0.002836812
##  [58,] 0.2564337 0.02310251 0.01244421 0.003562442
##  [59,] 0.2645525 0.02310251 0.01244421 0.003407879
##  [60,] 0.2566013 0.02310251 0.01244418 0.002753160
##  [61,] 0.2563815 0.02310251 0.01244494 0.002595412
##  [62,] 0.2563815 0.02310251 0.01244429 0.003498198
##  [63,] 0.2671614 0.02310251 0.01244431 0.003018693
##  [64,] 0.2563948 0.02310251 0.01244420 0.003247601
##  [65,] 0.2563948 0.02310251 0.01244421 0.003246640
##  [66,] 0.2688540 0.02310251 0.01244435 0.003143784
##  [67,] 0.2567622 0.02310251 0.01244415 0.002851555
##  [68,] 0.2567622 0.02310251 0.01244417 0.003406754
##  [69,] 0.2567622 0.02310251 0.01244422 0.002756205
##  [70,] 0.2567622 0.02310251 0.01244431 0.002790451
##  [71,] 0.2567622 0.02310251 0.01244429 0.003356577
##  [72,] 0.2680470 0.02310251 0.01244421 0.002696757
##  [73,] 0.2601376 0.02310251 0.01244419 0.002748391
##  [74,] 0.2563948 0.02310251 0.01244421 0.002657489
##  [75,] 0.2646034 0.02310251 0.01244428 0.003284028
##  [76,] 0.2563815 0.02310251 0.01244423 0.003311805
##  [77,] 0.2563948 0.02310251 0.01244421 0.002780885
##  [78,] 0.2564709 0.02310251 0.01244423 0.003265715
##  [79,] 0.2668051 0.02310251 0.01244433 0.003100225
##  [80,] 0.2563815 0.02310251 0.01245065 0.002676644
##  [81,] 0.2648963 0.02310251 0.01253469 0.002734583
##  [82,] 0.2665161 0.02310251 0.01244411 0.002718548
##  [83,] 0.2568435 0.02310251 0.01244419 0.002737317
##  [84,] 0.2565841 0.02310251 0.01244414 0.003107693
##  [85,] 0.2599423 0.02310251 0.01244413 0.002776348
##  [86,] 0.2563948 0.02310251 0.01245989 0.002694656
##  [87,] 0.2564709 0.02310251 0.01244487 0.003266339
##  [88,] 0.2564344 0.02310251 0.01245031 0.002951356
##  [89,] 0.2567348 0.02310251 0.01244414 0.003240187
##  [90,] 0.2563815 0.02310251 0.01244450 0.003606639
##  [91,] 0.2567622 0.02310251 0.01244414 0.002876276
##  [92,] 0.2618162 0.02310251 0.01244474 0.002713966
##  [93,] 0.2563948 0.02310251 0.01244438 0.002753725
##  [94,] 0.2675244 0.02310251 0.01244426 0.003497540
##  [95,] 0.2564709 0.02310251 0.01244422 0.002667266
##  [96,] 0.2564709 0.02310251 0.01244442 0.002967706
##  [97,] 0.2567495 0.02310251 0.01244416 0.003459355
##  [98,] 0.2669988 0.02310251 0.01244422 0.002875183
##  [99,] 0.2567348 0.02310251 0.01244435 0.002643288
## [100,] 0.2563815 0.02310251 0.01244425 0.003239758
## -----------------------------------------------------------------------------
dfstr = reshape2::melt(stress, varnames = c("replicate","dimensions"))
dfstr
##     replicate dimensions       value
## 1           1          1 0.256434433
## 2           2          1 0.256433681
## 3           3          1 0.256734766
## 4           4          1 0.256843486
## 5           5          1 0.256470858
## 6           6          1 0.268405905
## 7           7          1 0.256470858
## 8           8          1 0.265999985
## 9           9          1 0.256845647
## 10         10          1 0.256381465
## 11         11          1 0.272125774
## 12         12          1 0.270560362
## 13         13          1 0.256433681
## 14         14          1 0.256433681
## 15         15          1 0.256734766
## 16         16          1 0.256734766
## 17         17          1 0.266516103
## 18         18          1 0.266516103
## 19         19          1 0.256394793
## 20         20          1 0.267161436
## 21         21          1 0.272125774
## 22         22          1 0.256434433
## 23         23          1 0.256433681
## 24         24          1 0.266444849
## 25         25          1 0.256394793
## 26         26          1 0.256381465
## 27         27          1 0.256470858
## 28         28          1 0.256434433
## 29         29          1 0.256394793
## 30         30          1 0.268853953
## 31         31          1 0.256734766
## 32         32          1 0.256381465
## 33         33          1 0.256434433
## 34         34          1 0.264603406
## 35         35          1 0.256762244
## 36         36          1 0.256734766
## 37         37          1 0.256470858
## 38         38          1 0.259929280
## 39         39          1 0.256601284
## 40         40          1 0.256433681
## 41         41          1 0.256394793
## 42         42          1 0.264552504
## 43         43          1 0.256734766
## 44         44          1 0.256749537
## 45         45          1 0.259988227
## 46         46          1 0.256676573
## 47         47          1 0.256734766
## 48         48          1 0.256843486
## 49         49          1 0.256433681
## 50         50          1 0.256394793
## 51         51          1 0.256433681
## 52         52          1 0.256381465
## 53         53          1 0.269074711
## 54         54          1 0.267161436
## 55         55          1 0.256433681
## 56         56          1 0.256433681
## 57         57          1 0.256434433
## 58         58          1 0.256433681
## 59         59          1 0.264552504
## 60         60          1 0.256601284
## 61         61          1 0.256381465
## 62         62          1 0.256381465
## 63         63          1 0.267161436
## 64         64          1 0.256394793
## 65         65          1 0.256394793
## 66         66          1 0.268853953
## 67         67          1 0.256762244
## 68         68          1 0.256762244
## 69         69          1 0.256762244
## 70         70          1 0.256762244
## 71         71          1 0.256762244
## 72         72          1 0.268046999
## 73         73          1 0.260137582
## 74         74          1 0.256394793
## 75         75          1 0.264603406
## 76         76          1 0.256381465
## 77         77          1 0.256394793
## 78         78          1 0.256470858
## 79         79          1 0.266805072
## 80         80          1 0.256381465
## 81         81          1 0.264896306
## 82         82          1 0.266516103
## 83         83          1 0.256843486
## 84         84          1 0.256584099
## 85         85          1 0.259942331
## 86         86          1 0.256394793
## 87         87          1 0.256470858
## 88         88          1 0.256434433
## 89         89          1 0.256734766
## 90         90          1 0.256381470
## 91         91          1 0.256762244
## 92         92          1 0.261816201
## 93         93          1 0.256394793
## 94         94          1 0.267524440
## 95         95          1 0.256470858
## 96         96          1 0.256470858
## 97         97          1 0.256749537
## 98         98          1 0.266998761
## 99         99          1 0.256734766
## 100       100          1 0.256381465
## 101         1          2 0.023102506
## 102         2          2 0.023102506
## 103         3          2 0.023102506
## 104         4          2 0.023102506
## 105         5          2 0.023102506
## 106         6          2 0.023102506
## 107         7          2 0.023102506
## 108         8          2 0.023102506
## 109         9          2 0.023102506
## 110        10          2 0.023102506
## 111        11          2 0.023102506
## 112        12          2 0.023102506
## 113        13          2 0.023102506
## 114        14          2 0.023102506
## 115        15          2 0.023102506
## 116        16          2 0.023102506
## 117        17          2 0.023102506
## 118        18          2 0.023102506
## 119        19          2 0.023102506
## 120        20          2 0.023102506
## 121        21          2 0.023102506
## 122        22          2 0.023102506
## 123        23          2 0.023102506
## 124        24          2 0.023102506
## 125        25          2 0.023102506
## 126        26          2 0.023102506
## 127        27          2 0.023102506
## 128        28          2 0.023102506
## 129        29          2 0.023102506
## 130        30          2 0.023102506
## 131        31          2 0.023102506
## 132        32          2 0.023102506
## 133        33          2 0.023102506
## 134        34          2 0.023102506
## 135        35          2 0.023102506
## 136        36          2 0.023102506
## 137        37          2 0.023102506
## 138        38          2 0.023102506
## 139        39          2 0.023102506
## 140        40          2 0.023102506
## 141        41          2 0.023102506
## 142        42          2 0.023102506
## 143        43          2 0.023102506
## 144        44          2 0.023102506
## 145        45          2 0.023102506
## 146        46          2 0.023102506
## 147        47          2 0.023102506
## 148        48          2 0.023102506
## 149        49          2 0.023102506
## 150        50          2 0.023102506
## 151        51          2 0.023102506
## 152        52          2 0.023102506
## 153        53          2 0.023102506
## 154        54          2 0.023102506
## 155        55          2 0.023102506
## 156        56          2 0.023102506
## 157        57          2 0.023102506
## 158        58          2 0.023102506
## 159        59          2 0.023102506
## 160        60          2 0.023102506
## 161        61          2 0.023102506
## 162        62          2 0.023102506
## 163        63          2 0.023102506
## 164        64          2 0.023102506
## 165        65          2 0.023102506
## 166        66          2 0.023102506
## 167        67          2 0.023102506
## 168        68          2 0.023102506
## 169        69          2 0.023102506
## 170        70          2 0.023102506
## 171        71          2 0.023102506
## 172        72          2 0.023102506
## 173        73          2 0.023102506
## 174        74          2 0.023102506
## 175        75          2 0.023102506
## 176        76          2 0.023102506
## 177        77          2 0.023102506
## 178        78          2 0.023102506
## 179        79          2 0.023102506
## 180        80          2 0.023102506
## 181        81          2 0.023102506
## 182        82          2 0.023102506
## 183        83          2 0.023102506
## 184        84          2 0.023102506
## 185        85          2 0.023102506
## 186        86          2 0.023102506
## 187        87          2 0.023102506
## 188        88          2 0.023102506
## 189        89          2 0.023102506
## 190        90          2 0.023102506
## 191        91          2 0.023102506
## 192        92          2 0.023102506
## 193        93          2 0.023102506
## 194        94          2 0.023102506
## 195        95          2 0.023102506
## 196        96          2 0.023102506
## 197        97          2 0.023102506
## 198        98          2 0.023102506
## 199        99          2 0.023102506
## 200       100          2 0.023102506
## 201         1          3 0.012444222
## 202         2          3 0.012444203
## 203         3          3 0.012444224
## 204         4          3 0.012444136
## 205         5          3 0.012444513
## 206         6          3 0.012445962
## 207         7          3 0.012444207
## 208         8          3 0.012444495
## 209         9          3 0.012535153
## 210        10          3 0.012444226
## 211        11          3 0.012444667
## 212        12          3 0.012444170
## 213        13          3 0.012444316
## 214        14          3 0.012444494
## 215        15          3 0.012444267
## 216        16          3 0.012444144
## 217        17          3 0.012444273
## 218        18          3 0.012446411
## 219        19          3 0.012445505
## 220        20          3 0.012444290
## 221        21          3 0.012505088
## 222        22          3 0.012444174
## 223        23          3 0.012444147
## 224        24          3 0.012444235
## 225        25          3 0.012444314
## 226        26          3 0.012494603
## 227        27          3 0.012444164
## 228        28          3 0.012444210
## 229        29          3 0.012444265
## 230        30          3 0.012444282
## 231        31          3 0.012444193
## 232        32          3 0.012444305
## 233        33          3 0.012444172
## 234        34          3 0.012444302
## 235        35          3 0.012444290
## 236        36          3 0.012535153
## 237        37          3 0.012444194
## 238        38          3 0.012444575
## 239        39          3 0.012444362
## 240        40          3 0.012444505
## 241        41          3 0.012444328
## 242        42          3 0.012444969
## 243        43          3 0.012445516
## 244        44          3 0.012444249
## 245        45          3 0.012444224
## 246        46          3 0.012444241
## 247        47          3 0.012444216
## 248        48          3 0.012534792
## 249        49          3 0.012444303
## 250        50          3 0.012444353
## 251        51          3 0.012444243
## 252        52          3 0.012444227
## 253        53          3 0.012444225
## 254        54          3 0.012444290
## 255        55          3 0.012444218
## 256        56          3 0.012444199
## 257        57          3 0.012444236
## 258        58          3 0.012444208
## 259        59          3 0.012444211
## 260        60          3 0.012444176
## 261        61          3 0.012444943
## 262        62          3 0.012444294
## 263        63          3 0.012444314
## 264        64          3 0.012444197
## 265        65          3 0.012444210
## 266        66          3 0.012444347
## 267        67          3 0.012444153
## 268        68          3 0.012444173
## 269        69          3 0.012444221
## 270        70          3 0.012444309
## 271        71          3 0.012444287
## 272        72          3 0.012444206
## 273        73          3 0.012444190
## 274        74          3 0.012444214
## 275        75          3 0.012444281
## 276        76          3 0.012444229
## 277        77          3 0.012444214
## 278        78          3 0.012444229
## 279        79          3 0.012444328
## 280        80          3 0.012450653
## 281        81          3 0.012534687
## 282        82          3 0.012444111
## 283        83          3 0.012444186
## 284        84          3 0.012444135
## 285        85          3 0.012444135
## 286        86          3 0.012459894
## 287        87          3 0.012444871
## 288        88          3 0.012450311
## 289        89          3 0.012444138
## 290        90          3 0.012444505
## 291        91          3 0.012444143
## 292        92          3 0.012444738
## 293        93          3 0.012444378
## 294        94          3 0.012444263
## 295        95          3 0.012444222
## 296        96          3 0.012444422
## 297        97          3 0.012444161
## 298        98          3 0.012444218
## 299        99          3 0.012444349
## 300       100          3 0.012444254
## 301         1          4 0.003105991
## 302         2          4 0.002708738
## 303         3          4 0.002651645
## 304         4          4 0.002867260
## 305         5          4 0.003273426
## 306         6          4 0.003217022
## 307         7          4 0.002744531
## 308         8          4 0.003343448
## 309         9          4 0.003606675
## 310        10          4 0.003507884
## 311        11          4 0.002898068
## 312        12          4 0.003083802
## 313        13          4 0.003718418
## 314        14          4 0.002763495
## 315        15          4 0.003088338
## 316        16          4 0.003487105
## 317        17          4 0.002644791
## 318        18          4 0.002798770
## 319        19          4 0.002673181
## 320        20          4 0.003172974
## 321        21          4 0.002742815
## 322        22          4 0.002913404
## 323        23          4 0.002859349
## 324        24          4 0.002947876
## 325        25          4 0.002696052
## 326        26          4 0.002679122
## 327        27          4 0.003006277
## 328        28          4 0.002975636
## 329        29          4 0.002665067
## 330        30          4 0.002839643
## 331        31          4 0.002837803
## 332        32          4 0.002666064
## 333        33          4 0.002641424
## 334        34          4 0.002726495
## 335        35          4 0.003056776
## 336        36          4 0.002621917
## 337        37          4 0.002771835
## 338        38          4 0.003328291
## 339        39          4 0.002740097
## 340        40          4 0.002620992
## 341        41          4 0.003000842
## 342        42          4 0.002819788
## 343        43          4 0.003533963
## 344        44          4 0.002795333
## 345        45          4 0.002757198
## 346        46          4 0.002839389
## 347        47          4 0.002838302
## 348        48          4 0.003294939
## 349        49          4 0.003597160
## 350        50          4 0.002840022
## 351        51          4 0.003496745
## 352        52          4 0.002670635
## 353        53          4 0.002911839
## 354        54          4 0.003393294
## 355        55          4 0.002652747
## 356        56          4 0.002800906
## 357        57          4 0.002836812
## 358        58          4 0.003562442
## 359        59          4 0.003407879
## 360        60          4 0.002753160
## 361        61          4 0.002595412
## 362        62          4 0.003498198
## 363        63          4 0.003018693
## 364        64          4 0.003247601
## 365        65          4 0.003246640
## 366        66          4 0.003143784
## 367        67          4 0.002851555
## 368        68          4 0.003406754
## 369        69          4 0.002756205
## 370        70          4 0.002790451
## 371        71          4 0.003356577
## 372        72          4 0.002696757
## 373        73          4 0.002748391
## 374        74          4 0.002657489
## 375        75          4 0.003284028
## 376        76          4 0.003311805
## 377        77          4 0.002780885
## 378        78          4 0.003265715
## 379        79          4 0.003100225
## 380        80          4 0.002676644
## 381        81          4 0.002734583
## 382        82          4 0.002718548
## 383        83          4 0.002737317
## 384        84          4 0.003107693
## 385        85          4 0.002776348
## 386        86          4 0.002694656
## 387        87          4 0.003266339
## 388        88          4 0.002951356
## 389        89          4 0.003240187
## 390        90          4 0.003606639
## 391        91          4 0.002876276
## 392        92          4 0.002713966
## 393        93          4 0.002753725
## 394        94          4 0.003497540
## 395        95          4 0.002667266
## 396        96          4 0.002967706
## 397        97          4 0.003459355
## 398        98          4 0.002875183
## 399        99          4 0.002643288
## 400       100          4 0.003239758
library("ggplot2")
ggplot(dfstr, aes(y = value, x = dimensions, group = dimensions)) +
  geom_boxplot()

## -----------------------------------------------------------------------------
nmdsk2 = metaMDS(disekm, k = 2, autotransform = FALSE)
## Run 0 stress 0.02310251 
## Run 1 stress 0.02310251 
## ... New best solution
## ... Procrustes: rmse 2.261024e-06  max resid 4.035376e-06 
## ... Similar to previous best
## Run 2 stress 0.02310251 
## ... Procrustes: rmse 2.288581e-06  max resid 4.010604e-06 
## ... Similar to previous best
## Run 3 stress 0.02310251 
## ... New best solution
## ... Procrustes: rmse 8.855765e-07  max resid 2.357504e-06 
## ... Similar to previous best
## Run 4 stress 0.02310251 
## ... Procrustes: rmse 1.676326e-06  max resid 2.74728e-06 
## ... Similar to previous best
## Run 5 stress 0.02310251 
## ... Procrustes: rmse 1.433153e-06  max resid 2.447657e-06 
## ... Similar to previous best
## Run 6 stress 0.02310251 
## ... Procrustes: rmse 3.523568e-06  max resid 6.102494e-06 
## ... Similar to previous best
## Run 7 stress 0.02310251 
## ... Procrustes: rmse 3.330735e-06  max resid 5.309519e-06 
## ... Similar to previous best
## Run 8 stress 0.02310251 
## ... Procrustes: rmse 2.275031e-06  max resid 3.567351e-06 
## ... Similar to previous best
## Run 9 stress 0.02310251 
## ... New best solution
## ... Procrustes: rmse 1.042435e-06  max resid 1.695406e-06 
## ... Similar to previous best
## Run 10 stress 0.02310251 
## ... Procrustes: rmse 1.989542e-06  max resid 3.352387e-06 
## ... Similar to previous best
## Run 11 stress 0.02310251 
## ... Procrustes: rmse 1.114155e-06  max resid 1.686595e-06 
## ... Similar to previous best
## Run 12 stress 0.02310251 
## ... Procrustes: rmse 1.028951e-06  max resid 1.76672e-06 
## ... Similar to previous best
## Run 13 stress 0.02310251 
## ... Procrustes: rmse 2.026597e-06  max resid 3.485932e-06 
## ... Similar to previous best
## Run 14 stress 0.02310251 
## ... Procrustes: rmse 2.134079e-06  max resid 3.604351e-06 
## ... Similar to previous best
## Run 15 stress 0.02310251 
## ... Procrustes: rmse 2.240369e-06  max resid 3.664711e-06 
## ... Similar to previous best
## Run 16 stress 0.02310251 
## ... Procrustes: rmse 1.980293e-06  max resid 3.736667e-06 
## ... Similar to previous best
## Run 17 stress 0.02310251 
## ... Procrustes: rmse 1.484197e-06  max resid 2.537041e-06 
## ... Similar to previous best
## Run 18 stress 0.02310251 
## ... Procrustes: rmse 1.714337e-06  max resid 2.964587e-06 
## ... Similar to previous best
## Run 19 stress 0.02310251 
## ... Procrustes: rmse 5.208182e-06  max resid 8.464009e-06 
## ... Similar to previous best
## Run 20 stress 0.02310251 
## ... Procrustes: rmse 2.465314e-06  max resid 4.204134e-06 
## ... Similar to previous best
## *** Solution reached
nmdsk2
## 
## Call:
## metaMDS(comm = disekm, k = 2, autotransform = FALSE) 
## 
## global Multidimensional Scaling using monoMDS
## 
## Data:     disekm 
## Distance: user supplied 
## 
## Dimensions: 2 
## Stress:     0.02310251 
## Stress type 1, weak ties
## Two convergent solutions found after 20 tries
## Scaling: centring, PC rotation 
## Species: scores missing
nmdsk2$points
##            MDS1        MDS2
## w434 -0.1531437 -0.43529207
## w445 -0.1778139 -0.39469836
## w465 -0.4343192 -0.27291116
## w472 -0.4656931 -0.24265893
## w490 -0.4767777  0.06396620
## w504 -0.3992916  0.31370056
## w537 -0.2638100  0.41606649
## w555 -0.1493467  0.47154465
## w584  0.2711683  0.35511499
## w600  0.4003808  0.18752334
## w610  0.4948220  0.03556897
## w628  0.4855930 -0.10962294
## w651  0.4411678 -0.16687544
## w674  0.4270639 -0.22142630
## attr(,"centre")
## [1] TRUE
## attr(,"pc")
## [1] TRUE
## attr(,"halfchange")
## [1] FALSE
## attr(,"internalscaling")
## [1] 2.07271
stressplot(nmdsk2, pch = 20)

## -----------------------------------------------------------------------------
ggplot(dfekm, aes(x = MDS1, y = MDS2)) +
  geom_point(col = dfekm$rgb, size = 4) +
  geom_text_repel(aes(label = name)) + coord_fixed()

nmdsk2$points[, 1:2] |> 
  `colnames<-`(paste0("NmMDS", 1:2)) |>
  as_tibble() |> 
  bind_cols(dplyr::select(dfekm, rgb, name)) |>
  ggplot(aes(x = NmMDS1, y = NmMDS2)) +
    geom_point(col = dfekm$rgb, size = 4) +
    geom_text_repel(aes(label = name))

## -----------------------------------------------------------------------------
IBDchip = readRDS("../data/vsn28Exprd.rds")
library("ade4")
library("factoextra")
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library("sva")
## Loading required package: mgcv
## Loading required package: nlme
## 
## Attaching package: 'nlme'
## The following object is masked from 'package:dplyr':
## 
##     collapse
## This is mgcv 1.8-39. For overview type 'help("mgcv-package")'.
## Loading required package: genefilter
## Loading required package: BiocParallel
## -----------------------------------------------------------------------------
class(IBDchip)
## [1] "matrix" "array"
dim(IBDchip)
## [1] 8635   28
tail(IBDchip[,1:3])
##                                  20CF     20DF     20MF
## bm-026.1.sig_st              7.299308 7.275802 7.383103
## bm-125.1.sig_st              8.538857 8.998562 9.296096
## bru.tab.d.HIII.Con32.sig_st  6.802736 6.777566 6.859950
## bru.tab.d.HIII.Con323.sig_st 6.463604 6.501139 6.611851
## bru.tab.d.HIII.Con5.sig_st   5.739235 5.666060 5.831079
## day                          2.000000 2.000000 2.000000
table(IBDchip[nrow(IBDchip), ])
## 
##  1  2  3 
##  8 16  4
## -----------------------------------------------------------------------------
assayIBD = IBDchip[-nrow(IBDchip), ]
day      = factor(IBDchip[nrow(IBDchip), ])
day
##        20CF        20DF        20MF        20PF          CC          CD 
##           2           2           2           2           1           1 
##          CM    CNTR23CF    CNTR23DF    CNTR23MF    CNTR23PF      CNTRCF 
##           1           2           2           2           2           2 
##      CNTRDF      CNTRMF      CNTRPF          CP IBS1_CntrCb IBS1_CntrPb 
##           2           2           2           1           3           3 
##  IBS1_IBSCb  IBS1_IBSPb        IBSC        IBSD        IBSM        IBSP 
##           3           3           2           2           2           2 
##          IC          ID          IM          IP 
##           1           1           1           1 
## Levels: 1 2 3
## -----------------------------------------------------------------------------
rankthreshPCA = function(x, threshold = 3000) {
  ranksM = apply(x, 2, rank)
  ranksM[ranksM < threshold] = threshold
  ranksM = threshold - ranksM
  dudi.pca(t(ranksM), scannf = FALSE, nf = 2)
}
pcaDay12 = rankthreshPCA(assayIBD[, day != 3])
pcaDay12
## Duality diagramm
## class: pca dudi
## $call: dudi.pca(df = t(ranksM), scannf = FALSE, nf = 2)
## 
## $nf: 2 axis-components saved
## $rank: 23
## eigen values: 1401 1033 586 506.7 378.2 ...
##   vector length mode    content       
## 1 $cw    8634   numeric column weights
## 2 $lw    24     numeric row weights   
## 3 $eig   23     numeric eigen values  
## 
##   data.frame nrow ncol content             
## 1 $tab       24   8634 modified array      
## 2 $li        24   2    row coordinates     
## 3 $l1        24   2    row normed scores   
## 4 $co        8634 2    column coordinates  
## 5 $c1        8634 2    column normed scores
## other elements: cent norm
fviz_eig(pcaDay12, bar_width = 0.6) + ggtitle("")

## -----------------------------------------------------------------------------
day12 = day[ day!=3 ]
rtPCA1 = fviz(pcaDay12, element = "ind", axes = c(1, 2), geom = c("point", "text"),
  habillage = day12, repel = TRUE, palette = "Dark2",
  addEllipses = TRUE, ellipse.type = "convex") + ggtitle("") +
  coord_fixed()
rtPCA1

## -----------------------------------------------------------------------------
pcaDay123 = rankthreshPCA(assayIBD)
fviz(pcaDay123, element = "ind", axes = c(1, 2), geom = c("point", "text"),
  habillage = day, repel = TRUE, palette = "Dark2",
  addEllipses = TRUE, ellipse.type = "convex") + 
  ggtitle("") + coord_fixed()

## -----------------------------------------------------------------------------
rtPCA1

fviz(pcaDay123, element="ind", axes=c(1,2), geom=c("point","text"),
  habillage = day, repel=TRUE, palette = "Dark2",
  addEllipses = TRUE, ellipse.type = "convex") + ggtitle("") +
  coord_fixed()

## -----------------------------------------------------------------------------
fviz_pca_ind(pcaDay123, habillage = day, labelsize = 3,
  palette = "Dark2", addEllipses = TRUE, ellipse.level = 0.69)

## -----------------------------------------------------------------------------
fviz_eig(pcaDay123, bar_width=0.6) + ggtitle("")

model0 = model.matrix(~1, day)
model0
##             (Intercept)
## 20CF                  1
## 20DF                  1
## 20MF                  1
## 20PF                  1
## CC                    1
## CD                    1
## CM                    1
## CNTR23CF              1
## CNTR23DF              1
## CNTR23MF              1
## CNTR23PF              1
## CNTRCF                1
## CNTRDF                1
## CNTRMF                1
## CNTRPF                1
## CP                    1
## IBS1_CntrCb           1
## IBS1_CntrPb           1
## IBS1_IBSCb            1
## IBS1_IBSPb            1
## IBSC                  1
## IBSD                  1
## IBSM                  1
## IBSP                  1
## IC                    1
## ID                    1
## IM                    1
## IP                    1
## attr(,"assign")
## [1] 0
## -----------------------------------------------------------------------------

combatIBD = ComBat(dat = assayIBD, batch = day, mod = model0)
## Found3batches
## Adjusting for0covariate(s) or covariate level(s)
## Standardizing Data across genes
## Fitting L/S model and finding priors
## Finding parametric adjustments
## Adjusting the Data
pcaDayBatRM = rankthreshPCA(combatIBD)
fviz(pcaDayBatRM, element = "ind", geom = c("point", "text"),
  habillage = day, repel=TRUE, palette = "Dark2", addEllipses = TRUE,
  ellipse.type = "convex", axes =c(1,2)) + coord_fixed() + ggtitle("")

library("SummarizedExperiment")
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
## 
## Attaching package: 'matrixStats'
## The following objects are masked from 'package:genefilter':
## 
##     rowSds, rowVars
## The following object is masked from 'package:dplyr':
## 
##     count
## 
## Attaching package: 'MatrixGenerics'
## The following objects are masked from 'package:matrixStats':
## 
##     colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
##     colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
##     colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
##     colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
##     colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
##     colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
##     colWeightedMeans, colWeightedMedians, colWeightedSds,
##     colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
##     rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
##     rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
##     rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
##     rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
##     rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
##     rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
##     rowWeightedSds, rowWeightedVars
## The following objects are masked from 'package:genefilter':
## 
##     rowSds, rowVars
## Loading required package: GenomicRanges
## Loading required package: stats4
## Loading required package: BiocGenerics
## 
## Attaching package: 'BiocGenerics'
## The following object is masked from 'package:ade4':
## 
##     score
## The following objects are masked from 'package:dplyr':
## 
##     combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, append, as.data.frame, basename, cbind, colnames,
##     dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
##     grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
##     order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
##     rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
##     union, unique, unsplit, which.max, which.min
## Loading required package: S4Vectors
## 
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:dplyr':
## 
##     first, rename
## The following objects are masked from 'package:base':
## 
##     expand.grid, I, unname
## Loading required package: IRanges
## 
## Attaching package: 'IRanges'
## The following object is masked from 'package:nlme':
## 
##     collapse
## The following objects are masked from 'package:dplyr':
## 
##     collapse, desc, slice
## The following object is masked from 'package:grDevices':
## 
##     windows
## Loading required package: GenomeInfoDb
## Loading required package: Biobase
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## 
## Attaching package: 'Biobase'
## The following object is masked from 'package:MatrixGenerics':
## 
##     rowMedians
## The following objects are masked from 'package:matrixStats':
## 
##     anyMissing, rowMedians
colnames(IBDchip)
##  [1] "20CF"        "20DF"        "20MF"        "20PF"        "CC"         
##  [6] "CD"          "CM"          "CNTR23CF"    "CNTR23DF"    "CNTR23MF"   
## [11] "CNTR23PF"    "CNTRCF"      "CNTRDF"      "CNTRMF"      "CNTRPF"     
## [16] "CP"          "IBS1_CntrCb" "IBS1_CntrPb" "IBS1_IBSCb"  "IBS1_IBSPb" 
## [21] "IBSC"        "IBSD"        "IBSM"        "IBSP"        "IC"         
## [26] "ID"          "IM"          "IP"
treatment  = factor(ifelse(grepl("Cntr|^C", colnames(IBDchip)), "CTL", "IBS"))
treatment
##  [1] IBS IBS IBS IBS CTL CTL CTL CTL CTL CTL CTL CTL CTL CTL CTL CTL CTL CTL IBS
## [20] IBS IBS IBS IBS IBS IBS IBS IBS IBS
## Levels: CTL IBS
## -----------------------------------------------------------------------------

sampledata = DataFrame(day = day, treatment = treatment)
chipse = SummarizedExperiment(assays  = list(abundance = assayIBD),
                              colData = sampledata)

## -----------------------------------------------------------------------------
# check whether the 'grep'ed treatment status agree with
# a manual 'hardcoded' version provided by Susan previously.
stopifnot(identical(chipse$treatment, factor(c("IBS", "CTL")[
  c(1,1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1)])))

## -----------------------------------------------------------------------------
chipse[, day == 2]
## class: SummarizedExperiment 
## dim: 8634 16 
## metadata(0):
## assays(1): abundance
## rownames(8634): 01010101000000.2104_gPM_GC 01010101000000.2141_gPM_GC
##   ... bru.tab.d.HIII.Con323.sig_st bru.tab.d.HIII.Con5.sig_st
## rowData names(0):
## colnames(16): 20CF 20DF ... IBSM IBSP
## colData names(2): day treatment
## -----------------------------------------------------------------------------
corese = readRDS("../data/normse.rds")
corese
## class: SummarizedExperiment 
## dim: 1000 747 
## metadata(0):
## assays(3): counts normalizedValues residuals
## rownames(1000): Cbr2 Cyp2f2 ... Rnf13 Atp7b
## rowData names(0):
## colnames(747): OEP01_N706_S501 OEP01_N701_S501 ... OEL23_N704_S503
##   OEL23_N703_S502
## colData names(69): Experiment Batch ... W49 W50
assays(corese)
## List of length 3
## names(3): counts normalizedValues residuals
names(assays(corese))
## [1] "counts"           "normalizedValues" "residuals"
dim(assays(corese)$counts)
## [1] 1000  747
dim(assays(corese)$normalizedValues)
## [1] 1000  747
dim(assays(corese)$residuals)
## [1] 1000  747
head(colData(corese))
## DataFrame with 6 rows and 69 columns
##                      Experiment    Batch publishedClusters    NREADS  NALIGNED
##                        <factor> <factor>         <numeric> <numeric> <numeric>
## OEP01_N706_S501 K5ERRY_UI_96HPT      Y01                 1   3313260   3167600
## OEP01_N701_S501 K5ERRY_UI_96HPT      Y01                 1   2902430   2757790
## OEP01_N707_S507 K5ERRY_UI_96HPT      Y01                 1   2307940   2178350
## OEP01_N705_S501 K5ERRY_UI_96HPT      Y01                 1   3337400   3183720
## OEP01_N709_S501 K5ERRY_UI_96HPT      Y01                -2   3502620   3352230
## OEP01_N702_S505 K5ERRY_UI_96HPT      Y01                 1   2984830   2844350
##                    RALIGN TOTAL_DUP    PRIMER PCT_RIBOSOMAL_BASES
##                 <numeric> <numeric> <numeric>           <numeric>
## OEP01_N706_S501   95.6035   47.9943 0.0154566               2e-06
## OEP01_N701_S501   95.0167   45.0150 0.0182066               0e+00
## OEP01_N707_S507   94.3852   43.7832 0.0219196               0e+00
## OEP01_N705_S501   95.3952   43.2688 0.0183041               2e-06
## OEP01_N709_S501   95.7065   54.6632 0.0165818               1e-06
## OEP01_N702_S505   95.2935   66.3881 0.0190047               0e+00
##                 PCT_CODING_BASES PCT_UTR_BASES PCT_INTRONIC_BASES
##                        <numeric>     <numeric>          <numeric>
## OEP01_N706_S501         0.200130      0.230654           0.404205
## OEP01_N701_S501         0.182461      0.201810           0.465702
## OEP01_N707_S507         0.152627      0.207897           0.511416
## OEP01_N705_S501         0.169514      0.207342           0.457556
## OEP01_N709_S501         0.310654      0.268904           0.283935
## OEP01_N702_S505         0.305311      0.283584           0.211970
##                 PCT_INTERGENIC_BASES PCT_MRNA_BASES MEDIAN_CV_COVERAGE
##                            <numeric>      <numeric>          <numeric>
## OEP01_N706_S501             0.165009       0.430784           0.843857
## OEP01_N701_S501             0.150027       0.384271           0.914370
## OEP01_N707_S507             0.128060       0.360524           0.955405
## OEP01_N705_S501             0.165586       0.376856           0.816630
## OEP01_N709_S501             0.136506       0.579558           0.715861
## OEP01_N702_S505             0.199135       0.588895           1.011250
##                 MEDIAN_5PRIME_BIAS MEDIAN_3PRIME_BIAS     CreER ERCC_reads
##                          <numeric>          <numeric> <numeric>  <numeric>
## OEP01_N706_S501           0.061028           0.521079         1      10516
## OEP01_N701_S501           0.033350           0.373993      3022       9331
## OEP01_N707_S507           0.014606           0.491230      2329       7386
## OEP01_N705_S501           0.101798           0.525238       717       6387
## OEP01_N709_S501           0.136549           0.497042         2       8362
## OEP01_N702_S505           0.013452           0.453206      7007      33321
##                        W1         W2        W3        W4         W5         W6
##                 <numeric>  <numeric> <numeric> <numeric>  <numeric>  <numeric>
## OEP01_N706_S501  0.520220  0.7111960 -0.964288  0.736816 -0.2326359  0.2107019
## OEP01_N701_S501  0.426466  0.1297278 -0.555788  0.245425  0.0141273 -0.0471313
## OEP01_N707_S507  0.702733  0.2162028 -0.376394  0.466649 -0.3101722  0.0305582
## OEP01_N705_S501  0.578357 -0.0396907 -0.357542  0.426910  0.2361442 -0.0049643
## OEP01_N709_S501 -0.622039 -0.3156792  0.530188  1.069105 -1.9034926  0.6185733
## OEP01_N702_S505  0.417916  1.1798491 -0.652745  1.667191 -3.0199206  1.9204409
##                          W7         W8        W9        W10       W11
##                   <numeric>  <numeric> <numeric>  <numeric> <numeric>
## OEP01_N706_S501  0.35886105  0.0107942 -0.278665  0.2682341 -0.310914
## OEP01_N701_S501 -0.01789151  0.0146882 -0.374432 -0.0954823 -0.322615
## OEP01_N707_S507 -0.12752002 -0.5293184 -0.749898 -0.4576471 -0.437090
## OEP01_N705_S501  0.00454701 -0.1874755 -0.216097 -0.1534220 -0.275354
## OEP01_N709_S501 -1.18603972 -0.1784294 -1.138336  0.2595522  0.167002
## OEP01_N702_S505  0.01361619  1.0272006  2.622282 -1.8732545  1.202000
##                        W12         W13        W14         W15       W16
##                  <numeric>   <numeric>  <numeric>   <numeric> <numeric>
## OEP01_N706_S501 -0.0972732  0.00386093  0.5995104 -0.00847401  0.173571
## OEP01_N701_S501  0.0220218 -0.00131921  0.2645274  0.11864449 -0.148296
## OEP01_N707_S507 -0.2815862  0.05257196  0.2150666  0.33405161  0.231073
## OEP01_N705_S501  0.1305329 -0.30296558 -0.1121428  0.45152923 -0.582003
## OEP01_N709_S501 -0.2005908 -0.46462971 -0.0845307  0.15624995 -0.275188
## OEP01_N702_S505 -2.2273784  3.69341246  2.0219742 -2.37901395 -1.742345
##                        W17        W18       W19       W20         W21
##                  <numeric>  <numeric> <numeric> <numeric>   <numeric>
## OEP01_N706_S501  0.0548535 -0.1185615 -0.170439 -0.177957 -0.02763525
## OEP01_N701_S501 -0.2170747 -0.0613756  0.136067 -0.608353 -0.15161779
## OEP01_N707_S507 -0.1143111  0.7471833  0.145397 -0.977857 -0.27504652
## OEP01_N705_S501  0.1636372  0.0792015  0.163124  0.245431 -0.00828173
## OEP01_N709_S501 -0.8483263  0.6533705 -0.161370 -0.202132  1.50893593
## OEP01_N702_S505 -2.7199021 -2.7202103 -1.917451  1.608239 -2.48233730
##                        W22        W23        W24        W25        W26
##                  <numeric>  <numeric>  <numeric>  <numeric>  <numeric>
## OEP01_N706_S501 -0.1101543 -0.0452483 -0.2466602  0.1410014  0.1925462
## OEP01_N701_S501 -0.3549235 -0.0582717 -0.1050211 -0.0587576 -0.0264406
## OEP01_N707_S507 -0.4399909 -0.2052275  0.0898161  0.0378414  0.0131375
## OEP01_N705_S501 -0.0693465 -0.0480346  0.2798822 -0.0222410 -0.2152044
## OEP01_N709_S501 -0.8484122 -0.3315167 -0.9691109  0.4486845 -1.5099861
## OEP01_N702_S505  1.2620105  0.1750976  0.8911051  0.0748229  1.9686151
##                        W27        W28       W29         W30       W31
##                  <numeric>  <numeric> <numeric>   <numeric> <numeric>
## OEP01_N706_S501 -0.0714981 -0.0792524  0.224363  0.14884034 -0.176882
## OEP01_N701_S501 -0.4952902 -0.0893022  0.230802  0.60100328 -0.204547
## OEP01_N707_S507 -0.1087171 -0.3183907  0.081320 -0.32288254 -0.245785
## OEP01_N705_S501 -0.2601506  0.3927593  0.049813  0.23798699 -0.283526
## OEP01_N709_S501 -0.5750850 -0.4881507 -0.984324  0.94240028 -0.340385
## OEP01_N702_S505  0.7140347 -0.8917797  1.172403  0.00813556  0.766339
##                        W32       W33        W34       W35        W36
##                  <numeric> <numeric>  <numeric> <numeric>  <numeric>
## OEP01_N706_S501  0.1107873 -0.242199 -0.1128412 0.0486555  0.0406623
## OEP01_N701_S501  0.0295410  0.408077 -0.0209646 0.2154537 -0.2847428
## OEP01_N707_S507 -0.0559962  0.132861  0.4553084 0.4884976 -0.2712777
## OEP01_N705_S501 -0.3440416  0.280244 -0.0112784 0.0902062 -0.3242816
## OEP01_N709_S501  0.2662828 -0.345783  0.0678599 0.2062731  0.2002757
## OEP01_N702_S505  0.6875437  1.189379  0.4218973 0.9876865  0.2397630
##                         W37       W38       W39       W40       W41        W42
##                   <numeric> <numeric> <numeric> <numeric> <numeric>  <numeric>
## OEP01_N706_S501 -0.34664087  0.296591 -0.397047  0.660618  0.151844 -0.4104539
## OEP01_N701_S501 -0.31658029  0.112225  0.204178  0.161759  0.110611 -0.2328593
## OEP01_N707_S507  0.05948657  0.236327  0.249922 -0.271585  0.614970  0.2856108
## OEP01_N705_S501 -0.00017546 -0.454404  0.112083 -0.142695  0.267346  0.0835569
## OEP01_N709_S501  0.62394067 -0.676848  1.391421  0.248249 -0.764525 -2.0354537
## OEP01_N702_S505 -0.17287513 -0.640165  0.843831  0.134415 -0.190887  0.0351919
##                        W43        W44        W45        W46       W47       W48
##                  <numeric>  <numeric>  <numeric>  <numeric> <numeric> <numeric>
## OEP01_N706_S501 -0.2994574 -0.1771022  0.0199596 -0.1850723 -0.287603  0.116291
## OEP01_N701_S501  0.0124421 -0.0730977  0.1268364 -0.4021831 -0.303506 -0.225725
## OEP01_N707_S507 -0.2248810 -0.2901847 -0.2951189 -0.2187010 -0.089894 -0.185862
## OEP01_N705_S501 -0.0848938  0.5588053  0.2413573  0.3476736  0.106708 -0.223877
## OEP01_N709_S501 -0.3648261 -0.2004645  0.1244308  0.0406688  1.491178  0.510538
## OEP01_N702_S505 -0.4896780  0.2840110  0.2488074  0.8956302  0.837638  0.574261
##                        W49       W50
##                  <numeric> <numeric>
## OEP01_N706_S501 -0.1379595  0.311405
## OEP01_N701_S501 -0.5307374  0.110388
## OEP01_N707_S507 -0.4666151 -0.163571
## OEP01_N705_S501 -0.0464004 -0.356657
## OEP01_N709_S501 -0.4225694  0.601275
## OEP01_N702_S505 -0.0437345  0.343162
norm = assays(corese)$normalizedValues

## -----------------------------------------------------------------------------
length(unique(colData(corese)$Batch))
## [1] 18
dim(norm)   #1000 genes 747 samples
## [1] 1000  747
rownames(norm)[1:10]
##  [1] "Cbr2"    "Cyp2f2"  "Gstm1"   "Sec14l3" "Cyp2g1"  "Sox11"   "Ebf1"   
##  [8] "Clu"     "Fmo6"    "Ugt2a1"
## -----------------------------------------------------------------------------
respca = dudi.pca(t(norm), nf = 3, scannf = FALSE) # dudi.pca use samples as rows to cluster rows
plotscree(respca, 15)

PCS = respca$li[, 1:3]

## -----------------------------------------------------------------------------
library("RColorBrewer")
publishedClusters = colData(corese)[, "publishedClusters"]
batch = colData(corese)$Batch
col_clus = c(
  "transparent", "#1B9E77", "antiquewhite2", "cyan", 
  "#E7298A", "#A6CEE3", "#666666", "#E6AB02", "#FFED6F", 
  "darkorchid2", "#B3DE69", "#FF7F00", "#A6761D", "#1F78B4")
names(col_clus) = sort(unique(publishedClusters))
## -----------------------------------------------------------------------------
library("rgl")
batch = colData(corese)$Batch
plot3d(PCS,aspect=sqrt(c(84,24,20)),col=col_clus[batch])
plot3d(PCS,aspect=sqrt(c(84,24,20)),
col = col_clus[as.character(publishedClusters)])


## -----------------------------------------------------------------------------
HIV <- data.frame(
  Patient = c('AHX112', 'AHX717', 'AHX543'), 
  Mut1 = c(0, 1, 1),
  Mut2 = c(0, 0, 0),
  Mut3 = c(0, 1, 0),
  '...' = rep(' ', 3))
knitr::kable(HIV, format = 'html')
Patient Mut1 Mut2 Mut3 …
AHX112 0 0 0
AHX717 1 0 1
AHX543 1 0 0
## -----------------------------------------------------------------------------
crossHIV <- data.frame(
  Patient = c('Mut1', 'Mut2', 'Mut3'), 
  Mut1 = c(853, 29, 10),
  Mut2 = c(29, 853, 52),
  Mut3 = c(10, 52, 853),
  '...' = rep(' ', 3))
knitr::kable(crossHIV, format = 'html')
Patient Mut1 Mut2 Mut3 …
Mut1 853 29 10
Mut2 29 853 52
Mut3 10 52 853
## -----------------------------------------------------------------------------
cooc = read.delim2("../data/coccurHIV.txt", header = TRUE, sep = ",")
cooc[1:4, 1:11]
##     X4S X6D X6K X11R X20R X21I X35I X35L X35M X35T X39A
## 4S    0  28   8    0   99    0   22    5   15    3   45
## 6D   26   0   0   34  131    0  108    4   30   13   84
## 6K    7   0   0    6   45    0    5   13   38   35   12
## 11R   0  35   7    0  127   12   60   17   15    6   42
HIVca = dudi.coa(cooc, nf = 4, scannf = FALSE)
fviz_eig(HIVca, geom = "bar", bar_width = 0.6) + ggtitle("")

## -----------------------------------------------------------------------------
library("rgl")
CA1=HIVca$li[,1];CA2=HIVca$li[,2];CA3=HIVca$li[,3]
plot3d(CA1,CA2,CA3,aspect=FALSE,col="purple")
## -----------------------------------------------------------------------------
fviz_ca_row(HIVca,axes = c(1, 2),geom="text", col.row="purple",
  labelsize=3)+ggtitle("") + xlim(-0.55, 1.7) + ylim(-0.53,1.1) +
  theme_bw() +  coord_fixed()

fviz_ca_row(HIVca,axes = c(1, 3), geom="text",col.row="purple",
    labelsize=3)+ggtitle("")+ xlim(-0.55, 1.7)+ylim(-0.5,0.6) +
    theme_bw() + coord_fixed()

## -----------------------------------------------------------------------------
fviz_ca_row(HIVca, axes=c(1, 3), geom="text", col.row="purple", labelsize=3) +
  ggtitle("") + theme_minimal() + coord_fixed()

## -----------------------------------------------------------------------------
HairEyeColor
## , , Sex = Male
## 
##        Eye
## Hair    Brown Blue Hazel Green
##   Black    32   11    10     3
##   Brown    53   50    25    15
##   Red      10   10     7     7
##   Blond     3   30     5     8
## 
## , , Sex = Female
## 
##        Eye
## Hair    Brown Blue Hazel Green
##   Black    36    9     5     2
##   Brown    66   34    29    14
##   Red      16    7     7     7
##   Blond     4   64     5     8
HairColor = HairEyeColor[,,2]
HairColor
##        Eye
## Hair    Brown Blue Hazel Green
##   Black    36    9     5     2
##   Brown    66   34    29    14
##   Red      16    7     7     7
##   Blond     4   64     5     8
chisq.test(HairColor)
## Warning in chisq.test(HairColor): Chi-squared approximation may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  HairColor
## X-squared = 106.66, df = 9, p-value < 2.2e-16
## -----------------------------------------------------------------------------
knitr::kable(HairColor, format = 'html')
Brown Blue Hazel Green
Black 36 9 5 2
Brown 66 34 29 14
Red 16 7 7 7
Blond 4 64 5 8
## -----------------------------------------------------------------------------
rowsums = as.matrix(apply(HairColor, 1, sum))
rowsums
##       [,1]
## Black   52
## Brown  143
## Red     37
## Blond   81
colsums = as.matrix(apply(HairColor, 2, sum))
t(colsums)
##      Brown Blue Hazel Green
## [1,]   122  114    46    31
HCexp = rowsums %*%t (colsums) / sum(colsums)
## -----------------------------------------------------------------------------
mosaicplot(HCexp, shade=TRUE, las=1, type="pearson", cex.axis=0.7, main="")

## -----------------------------------------------------------------------------
sum((HairColor  - HCexp)^2/HCexp)
## [1] 106.6637
## -----------------------------------------------------------------------------
round(t(HairColor-HCexp))
##        Hair
## Eye     Black Brown Red Blond
##   Brown    16    10   2   -28
##   Blue    -10   -18  -6    34
##   Hazel    -3     8   2    -7
##   Green    -3     0   3     0
library("vcd")
## Loading required package: grid
## 
## Attaching package: 'vcd'
## The following object is masked from 'package:GenomicRanges':
## 
##     tile
## The following object is masked from 'package:IRanges':
## 
##     tile
mosaicplot(HairColor, shade=TRUE, las=1, type="pearson", cex.axis=0.7, main="")

## -----------------------------------------------------------------------------
HC = as.data.frame.matrix(HairColor)
coaHC = dudi.coa(HC,scannf=FALSE,nf=2)
round(coaHC$eig[1:3]/sum(coaHC$eig)*100)
## [1] 89 10  2
fviz_ca_biplot(coaHC, repel=TRUE, col.col="brown", col.row="purple") +
  ggtitle("") + ylim(c(-0.5,0.5))

## -----------------------------------------------------------------------------
library("vegan")
res.ca = vegan::cca(HairColor)
plot(res.ca, scaling=3)

## -----------------------------------------------------------------------------
load("../data/lakes.RData")
dim(lakelike)
## [1] 10 15
lakelike[1:3,1:8]
##      plant1 plant2 plant3 plant4 plant5 plant6 plant7 plant8
## loc1      6      4      0      3      0      0      0      0
## loc2      4      5      5      3      4      2      0      0
## loc3      3      4      7      4      5      2      1      1
reslake=dudi.coa(lakelike,scannf=FALSE,nf=2) # dudi.coa: correspondence analysis among the rows
reslake
## Duality diagramm
## class: coa dudi
## $call: dudi.coa(df = lakelike, scannf = FALSE, nf = 2)
## 
## $nf: 2 axis-components saved
## $rank: 9
## eigen values: 0.369 0.1622 0.05687 0.0222 0.01854 ...
##   vector length mode    content       
## 1 $cw    15     numeric column weights
## 2 $lw    10     numeric row weights   
## 3 $eig   9      numeric eigen values  
## 
##   data.frame nrow ncol content             
## 1 $tab       10   15   modified array      
## 2 $li        10   2    row coordinates     
## 3 $l1        10   2    row normed scores   
## 4 $co        15   2    column coordinates  
## 5 $c1        15   2    column normed scores
## other elements: N
length(reslake$eig)
## [1] 9
round(reslake$eig[1:8]/sum(reslake$eig),2)
## [1] 0.56 0.25 0.09 0.03 0.03 0.02 0.01 0.00
## -----------------------------------------------------------------------------
fviz_ca_row(reslake,repel=TRUE)+ggtitle("")+ylim(c(-0.55,1.7))

fviz_ca_biplot(reslake,repel=TRUE)+ggtitle("")+ylim(c(-0.55,1.7))

## -----------------------------------------------------------------------------
## # Provenance tracking, keep this for the record:
## Nor = read.csv("../data/nbt.3154-S3.csv",row.names=1)
## dim(Nor)
## blom = as.matrix(Nor)
## desc1=unlist(strsplit(rownames(blom),"_"))
## desc=desc1[seq(1,7867,2)]
## gr4sfg=which(substr(rownames(blom),1,5)=="4SFGA")
## gr4sf=which(substr(rownames(blom),1,4)=="4SGA")
## gr1=which(substr(rownames(blom),1,2)=="PS")
## gr2=which(substr(rownames(blom),1,2)=="NP")
## gr3=which(substr(rownames(blom),1,2)=="HF")
## colscells=c("blue","green","orange","red","purple")
## colnb=rep(0,3934)
## colnb[gr1]=1
## colnb[gr2]=2
## colnb[gr3]=3
## colnb[gr4sf]=4
## colnb[gr4sfg]=5
## typesort=rep(0,3934)
## typesort[ nchar(desc) < 5 & substr(rownames(blom), 3, 3) == "A"] = "sortA"
## typesort[ nchar(desc) < 5 & substr(rownames(blom), 3, 3) == "B"] = "sortB"
## typesort[ nchar(desc) >= 5 ] = "sortA"
## ftable(typesort)
## celltypes=as.factor(c("PS","NP","HF","4SG","4SGF-")[colnb])
## cellcol = colscells[colnb]
## colCells = DataFrame(celltypes=celltypes, cellcol=colscells[colnb])
## Moignard= SummarizedExperiment(assays=list(assayCells = blom), rowData=colCells)
## saveRDS(Moignard,file="../data/Moignard.rds")

## -----------------------------------------------------------------------------
Moignard = readRDS("../data/Moignard.rds")
Moignard
## class: SummarizedExperiment 
## dim: 3934 46 
## metadata(0):
## assays(1): assayCells
## rownames(3934): HFA1_001 HFA1_002 ... 4SFGA6_246 4SFGA6_247
## rowData names(2): celltypes cellcol
## colnames(46): Cbfa2t3h Cdh1 ... Tbx3 Ubc
## colData names(0):
cellt = rowData(Moignard)$celltypes
cellt[1:10]
##  [1] HF HF HF HF HF HF HF HF HF HF
## Levels: 4SG 4SGF- HF NP PS
table(cellt) # five populations from between embryonic days E7.0 and E8.25.
## cellt
##   4SG 4SGF-    HF    NP    PS 
##   983   770  1005   552   624
colsn = c("red", "purple", "orange", "green", "blue")
blom = assay(Moignard)
dim(blom) #3934 cells and 46 genes
## [1] 3934   46
blom[1,]
##     Cbfa2t3h         Cdh1         Cdh5        Egfl7       Eif2b1          Erg 
##  -2.59885576 -14.00000000   1.81510127  -0.09616803  -0.10803126  -4.03859313 
##         Ets1         Ets2         Etv2         Etv6         Fli1        FoxH1 
##  -2.83606668   1.13154874  -0.67196310  -1.63685031   3.42368180   0.39495055 
##        FoxO4        Gata1         Gfi1        Gfi1b       HbbbH1         Hhex 
##  -2.37693628  -4.92417205 -14.00000000 -14.00000000  -4.06128704  -2.47586930 
##        HoxB2        HoxB4        HoxD8       Ikaros       Itga2b          Kdr 
## -14.00000000  -4.03842468 -14.00000000  -4.25957146  -4.77724528   1.04701024 
##          Kit         Ldb1         Lmo2         Lyl1        Mecom        Meis1 
##  -3.01044681  -3.48850936  -3.05008882  -0.40674610  -5.12874538  -1.92857888 
##         Mitf       Mrpl19          Myb         Nfe2       Notch1       Pecam1 
##  -7.27555173  -2.09703105 -14.00000000 -14.00000000  -5.00328033  -0.04681895 
##       Polr2a        Procr        Runx1        Sfpi1        Sox17         Sox7 
##  -0.86309075  -2.22256484  -3.47034809  -4.47841126  -3.14228012  -0.41408476 
##         Tal1        Tbx20         Tbx3          Ubc 
##   0.14474502  -8.10209754  -2.47731723   3.06815305
dist2n.euclid = dist(blom)
length(dist2n.euclid)
## [1] 7736211
dim(as.matrix(dist2n.euclid))
## [1] 3934 3934
(3934*3934-3934)/2
## [1] 7736211
dist1n.l1     = dist(blom, "manhattan")

## -----------------------------------------------------------------------------
ce1Mds = cmdscale(dist1n.l1,     k = 20, eig = TRUE)
ce2Mds = cmdscale(dist2n.euclid, k = 20, eig = TRUE)
perc1  = round(100*sum(ce1Mds$eig[1:2])/sum(ce1Mds$eig))
perc2  = round(100*sum(ce2Mds$eig[1:2])/sum(ce2Mds$eig))

## -----------------------------------------------------------------------------
plotscree(ce1Mds, m = 4)

plotscree(ce2Mds, m = 4)

## -----------------------------------------------------------------------------
c1mds = ce1Mds$points[, 1:2] |>
        `colnames<-`(paste0("L1_PCo", 1:2)) |>
        as_tibble()
ggplot(c1mds, aes(x = L1_PCo1, y = L1_PCo2, color = cellt)) +
  geom_point(aes(color = cellt), alpha = 0.6) +
  scale_colour_manual(values = colsn) + guides(color = "none")

c2mds = ce2Mds$points[, 1:2] |>
        `colnames<-`(paste0("L2_PCo", 1:2)) |>
        as_tibble()
ggplot(c2mds, aes(x = L2_PCo1, y = L2_PCo2, color = cellt)) +
  geom_point(aes(color = cellt), alpha = 0.6) +
   scale_colour_manual(values = colsn) + guides(color = "none")

## -----------------------------------------------------------------------------
## Hack from https://stackoverflow.com/questions/12041042/how-to-plot-just-the-legends-in-ggplot2
ggpcolor = ggplot(c1mds,aes(x=L1_PCo1,y=L1_PCo2, color = cellt)) +
  geom_point(aes(color = cellt), alpha = 0.6) +
  scale_colour_manual(values=colsn, name = "cell type")
g_legend = function(a) {
     gt = ggplot_gtable(ggplot_build(a))
     leg = which(sapply(gt$grobs, function(x) x$name) == "guide-box")
     gt$grobs[[leg]]
}
grid.draw(g_legend(ggpcolor))

## -----------------------------------------------------------------------------
library("Rtsne")
restsne = Rtsne(blom, dims = 2, perplexity = 30, verbose = FALSE,
                max_iter = 900)
dftsne = restsne$Y[, 1:2] |>
         `colnames<-`(paste0("axis", 1:2)) |>
         as_tibble()
ggplot(dftsne,aes(x = axis1, y = axis2, color = cellt)) +
  geom_point(aes(color = cellt), alpha = 0.6) +
   scale_color_manual(values = colsn) + guides(color = "none")

restsne3 = Rtsne(blom, dims = 3, perplexity = 30, verbose = FALSE,
                 max_iter = 900)
dftsne3 = restsne3$Y[, 1:3] |>
          `colnames<-`(paste0("axis", 1:3)) |> 
          as_tibble()
ggplot(dftsne3,aes(x = axis3, y = axis2, group = cellt)) +
      geom_point(aes(color = cellt), alpha = 0.6) +
      scale_colour_manual(values = colsn) + guides(color = "none")

## -----------------------------------------------------------------------------
ukraine_tsne = Rtsne(ukraine_dists, is_distance = TRUE, perplexity = 8)
ukraine_tsne_df = tibble(
  PCo1 = ukraine_tsne$Y[, 1],
  PCo2 = ukraine_tsne$Y[, 2],
  labs = attr(ukraine_dists, "Labels")
)
ggplot(ukraine_tsne_df, aes(x = PCo1, y = PCo2, label = labs)) +
  geom_point() + geom_text_repel(col = "#0057b7") + coord_fixed() 

## -----------------------------------------------------------------------------
library("genefilter")
load("../data/microbe.rda")
metab = read.csv("../data/metabolites.csv", row.names = 1) |> as.matrix()
dim(metab) 
## [1] 487  12
## -----------------------------------------------------------------------------
library("phyloseq")
## 
## Attaching package: 'phyloseq'
## The following object is masked from 'package:SummarizedExperiment':
## 
##     distance
## The following object is masked from 'package:Biobase':
## 
##     sampleNames
## The following object is masked from 'package:GenomicRanges':
## 
##     distance
## The following object is masked from 'package:IRanges':
## 
##     distance
metab   = metab[rowSums(metab == 0) <= 3, ]
microbe = prune_taxa(taxa_sums(microbe) > 4, microbe)
microbe = filter_taxa(microbe, filterfun(kOverA(3, 2)), TRUE)
metab   = log(1 + metab, base = 10)
X       = log(1 + as.matrix(otu_table(microbe)), base = 10)

## -----------------------------------------------------------------------------
colnames(metab) = colnames(X)
pca1 = dudi.pca(t(metab), scal = TRUE, scann = FALSE)
pca2 = dudi.pca(t(X), scal = TRUE, scann = FALSE)
rv1 = RV.rtest(pca1$tab, pca2$tab, 999)
rv1
## Monte-Carlo test
## Call: RV.rtest(df1 = pca1$tab, df2 = pca2$tab, nrepet = 999)
## 
## Observation: 0.8400429 
## 
## Based on 999 replicates
## Simulated p-value: 0.001 
## Alternative hypothesis: greater 
## 
##     Std.Obs Expectation    Variance 
## 6.403039478 0.309715288 0.006859875
## -----------------------------------------------------------------------------
library("PMA")
ccaRes = CCA(t(X), t(metab), penaltyx = 0.15, penaltyz = 0.15, 
             typex = "standard", typez = "standard")
## 123456789
ccaRes
## Call: CCA(x = t(X), z = t(metab), typex = "standard", typez = "standard", 
##     penaltyx = 0.15, penaltyz = 0.15)
## 
## 
## Num non-zeros u's:  5 
## Num non-zeros v's:  16 
## Type of x:  standard 
## Type of z:  standard 
## Penalty for x: L1 bound is  0.15 
## Penalty for z: L1 bound is  0.15 
## Cor(Xu,Zv):  0.9904707
## -----------------------------------------------------------------------------
combined = cbind(t(X[ccaRes$u != 0, ]),
                 t(metab[ccaRes$v != 0, ]))
pcaRes = dudi.pca(combined, scannf = FALSE, nf = 3)
# annotation
genotype    = substr(rownames(pcaRes$li), 1, 2)
sampleType  = substr(rownames(pcaRes$l1), 3, 4)
featureType = grepl("\\.", colnames(combined))
featureType = ifelse(featureType, "Metabolite", "OTU")
sampleInfo  = data.frame(pcaRes$li, genotype, diet=sampleType)
featureInfo = data.frame(pcaRes$c1, feature = substr(colnames(combined), 1, 6))

## -----------------------------------------------------------------------------
ggplot() +  geom_point(data = sampleInfo,
  aes(x = Axis1, y = Axis2, col = diet, shape = genotype), size = 3) +
  geom_label_repel(data = featureInfo,
  aes(x = 5.5 * CS1, y = 5.5 * CS2, label = feature, fill = featureType),
      size = 2, segment.size = 0.3,
      label.padding = unit(0.1, "lines"), label.size = 0) +
  geom_point(data = featureInfo,
             aes(x = 5.5 * CS1, y = 5.5 * CS2, fill = featureType),
             size = 1, shape = 23, col = "#383838") +
  scale_color_brewer(palette = "Set2") +
  scale_fill_manual(values = c("#a6d854", "#e78ac3")) +
  guides(fill = guide_legend(override.aes = list(shape = 32, size = 0))) +
  coord_fixed()+
  labs(x = sprintf("Axis1 [%s%% Variance]",
                   100 * round(pcaRes$eig[1] / sum(pcaRes$eig), 2)),
       y = sprintf("Axis2 [%s%% Variance]",
                   100 * round(pcaRes$eig[2] / sum(pcaRes$eig), 2)),
       fill = "Feature Type", col = "Sample Type")
## Warning: ggrepel: 1 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

## -----------------------------------------------------------------------------
ps1=readRDS("../data/ps1.rds")
ps1p=filter_taxa(ps1, function(x) sum(x) > 0, TRUE)
psCCpnA = ordinate(ps1p, "CCA",
                 formula = ps1p ~ ageBin + family_relationship)

## -----------------------------------------------------------------------------
library("dplyr")
tax = data.frame(tax_table(ps1p),stringsAsFactors = FALSE)
tax$seq = rownames(tax)
mainOrders = c("Clostridiales", "Bacteroidales",
               "Lactobacillales", "Coriobacteriales")
tax$Order[!(tax$Order %in% mainOrders)] = "Other"
tax$Order = factor(tax$Order, levels = c(mainOrders, "Other"))
tax$otu_id = seq_len(ncol(otu_table(ps1p)))
scoresCCpnA = vegan::scores(psCCpnA)
sites = data.frame(scoresCCpnA$sites)
sites$SampleID = rownames(sites)
sites = left_join(sites, as(sample_data(ps1p), "data.frame"))
## Joining with `by = join_by(SampleID)`
species = data.frame(scoresCCpnA$species)
species$otu_id = seq_along(colnames(otu_table(ps1p)))
species = left_join(species, tax)
## Joining with `by = join_by(otu_id)`
## -----------------------------------------------------------------------------
evalProp = 100 * psCCpnA$CCA$eig[1:2] / sum(psCCpnA$CA$eig)
ggplot() +
 geom_point(data = sites,aes(x =CCA2, y =CCA1),shape =2,alpha=0.5) +
 geom_point(data = species,aes(x =CCA2,y =CCA1,col = Order),size=1)+
 geom_text_repel(data = dplyr::filter(species, CCA2 < (-2)),
                   aes(x = CCA2, y = CCA1, label = otu_id),
                   size = 2, segment.size = 0.1) +
 facet_grid(. ~ ageBin) +
 guides(col = guide_legend(override.aes = list(size = 2))) +
 labs(x = sprintf("Axis2 [%s%% variance]", round(evalProp[2])),
      y = sprintf("Axis1 [%s%% variance]", round(evalProp[1]))) +
 scale_color_brewer(palette = "Set1") + theme(legend.position="bottom")
## Warning: ggrepel: 9 unlabeled data points (too many overlaps). Consider increasing max.overlaps
## ggrepel: 9 unlabeled data points (too many overlaps). Consider increasing max.overlaps
## ggrepel: 9 unlabeled data points (too many overlaps). Consider increasing max.overlaps

## -----------------------------------------------------------------------------
ggplot() +
  geom_point(data = sites,   aes(x = CCA2, y = CCA1), shape = 2, alpha = 0.5) +
  geom_point(data = species, aes(x = CCA2, y = CCA1, col = Order), size = 1) +
  geom_text_repel(data =  dplyr::filter(species, CCA2 < (-2)),
                  aes(x = CCA2, y = CCA1, label = otu_id),
                  size = 2, segment.size = 0.1) +
  facet_grid(. ~ family_relationship) +
  guides(col = guide_legend(override.aes = list(size = 2))) +
  labs(x = sprintf("Axis2 [%s%% variance]", round(evalProp[2])),
       y = sprintf("Axis1 [%s%% variance]", round(evalProp[1]))) +
  scale_color_brewer(palette = "Set1") + theme(legend.position="bottom")
## Warning: ggrepel: 8 unlabeled data points (too many overlaps). Consider increasing max.overlaps
## ggrepel: 8 unlabeled data points (too many overlaps). Consider increasing max.overlaps

## -----------------------------------------------------------------------------
ibd.pres = ifelse(assayIBD[, 1:28] > 8.633, 1, 0)
ibd.pres[1:3,]
##                            20CF 20DF 20MF 20PF CC CD CM CNTR23CF CNTR23DF
## 01010101000000.2104_gPM_GC    0    0    0    0  0  0  0        0        0
## 01010101000000.2141_gPM_GC    0    0    0    0  0  0  0        0        0
## 01010101000000.2147_pPM_GC    0    0    0    0  0  0  0        0        0
##                            CNTR23MF CNTR23PF CNTRCF CNTRDF CNTRMF CNTRPF CP
## 01010101000000.2104_gPM_GC        0        0      0      0      0      0  0
## 01010101000000.2141_gPM_GC        0        0      0      0      0      0  0
## 01010101000000.2147_pPM_GC        0        0      0      0      0      0  0
##                            IBS1_CntrCb IBS1_CntrPb IBS1_IBSCb IBS1_IBSPb IBSC
## 01010101000000.2104_gPM_GC           0           0          0          0    0
## 01010101000000.2141_gPM_GC           0           0          0          0    0
## 01010101000000.2147_pPM_GC           0           0          0          0    0
##                            IBSD IBSM IBSP IC ID IM IP
## 01010101000000.2104_gPM_GC    0    0    0  0  0  0  0
## 01010101000000.2141_gPM_GC    0    0    0  0  0  0  0
## 01010101000000.2147_pPM_GC    0    0    0  0  0  0  0
dim(ibd.pres)
## [1] 8634   28
## -----------------------------------------------------------------------------
IBDca = dudi.coa(ibd.pres, scannf = FALSE, nf = 4)
fviz_eig(IBDca, geom = "bar", bar_width = 0.7) +
    ylab("Percentage of chisquare") + ggtitle("")

fviz(IBDca, element = "col", axes = c(1, 2), geom = "point",
     habillage = day, palette = "Dark2", addEllipses = TRUE, color = day,
     ellipse.type = "convex", alpha = 1, col.row.sup =  "blue",
     select = list(name = NULL, cos2 = NULL, contrib = NULL),
     repel = TRUE)

## -----------------------------------------------------------------------------
d1 <- t(data.frame(
  quiet = c(2770, 2150, 2140, 875, 1220, 821, 2510),
  angry = c(2970, 1530, 1740, 752, 1040, 710, 1730),
  clever = c(1650, 1270, 1320, 495, 693, 416, 1420),
  depressed = c(1480, 957, 983, 147, 330, 102, 1270),
  happy = c(19300, 8310, 8730, 1920, 4220, 2610, 9150),
  lively = c(1840, 1250, 1350, 659, 621, 488, 1480),
  perplexed = c(110,  71,  80,  19,  23,  15, 109),
  virtuous = c(179,  80, 102,  20,  25,  17, 165)))
colnames(d1) <- c('black','blue','green','grey','orange','purple','white')
knitr::kable(d1, format='html')
black blue green grey orange purple white
quiet 2770 2150 2140 875 1220 821 2510
angry 2970 1530 1740 752 1040 710 1730
clever 1650 1270 1320 495 693 416 1420
depressed 1480 957 983 147 330 102 1270
happy 19300 8310 8730 1920 4220 2610 9150
lively 1840 1250 1350 659 621 488 1480
perplexed 110 71 80 19 23 15 109
virtuous 179 80 102 20 25 17 165
## -----------------------------------------------------------------------------
colorsentiment = read.csv("../data/colorsentiment.csv")
colsent = xtabs(colorsentiment[,3] ~ colorsentiment[,2] + colorsentiment[,1])
coldf = data.frame(unclass(colsent))
coldf = round(coldf / 1000)
# xtable::xtable(round(coldf),display=rep("d", 8))
colorfactor = names(coldf)
veganout = vegan::cca(coldf)
colorfactor[c(4,7)] = c("darkgrey", "grey")
ordiplot(veganout, scaling = 3, type = "none", xlim =c(-1.2, 0.75), ylim =c(-0.7, 1))
text(veganout, "sites", pch = 21, col = "red", bg = "yellow", scaling = 3)
text(veganout, "species", pch = 21, col = colorfactor, bg = "black", cex=1.2, scaling = 3)

## -----------------------------------------------------------------------------
platof = read.table("../data/platof.txt", header = TRUE)
platof[1:4, ]
##       Rep Laws Crit Phil Pol Soph Tim
## uuuuu  42   91    5   24  13   26  18
## -uuuu  60  144    3   27  19   33  30
## u-uuu  64   72    3   20  24   31  46
## uu-uu  72   98    2   25  20   24  14
## -----------------------------------------------------------------------------
resPlato = dudi.coa(platof, scannf = FALSE, nf = 2)
fviz_ca_biplot(resPlato, axes=c(2, 1)) + ggtitle("")

fviz_eig(resPlato, geom = "bar", width = 0.6) + ggtitle("")

## -----------------------------------------------------------------------------
names(resPlato)
##  [1] "tab"  "cw"   "lw"   "eig"  "rank" "nf"   "c1"   "li"   "co"   "l1"  
## [11] "call" "N"
sum(resPlato$eig)
## [1] 0.132618
percentageInertia=round(100*cumsum(resPlato$eig)/sum(resPlato$eig))
percentageInertia
## [1]  69  85  92  96  98 100
percentageInertia[2]
## [1] 85
## -----------------------------------------------------------------------------
load("../data/lakes.RData")
lakelike[ 1:3, 1:8]
##      plant1 plant2 plant3 plant4 plant5 plant6 plant7 plant8
## loc1      6      4      0      3      0      0      0      0
## loc2      4      5      5      3      4      2      0      0
## loc3      3      4      7      4      5      2      1      1
lakelikeh[1:3, 1:8]
##      plant1 plant2 plant3 plant4 plant5 plant6 plant7 plant8
## loc1      6      4      0      3      0      0      0      0
## loc2      4      5      5      3      4      2      0      0
## loc3      3      4      7      4      5      2      1      1
e_coa  = dudi.coa(lakelike,  scannf = FALSE, nf = 2)
e_pca  = dudi.pca(lakelike,  scannf = FALSE, nf = 2)
eh_coa = dudi.coa(lakelikeh, scannf = FALSE, nf = 2)
eh_pca = dudi.pca(lakelikeh, scannf = FALSE, nf = 2)

## -----------------------------------------------------------------------------
scatter(e_pca)

scatter(e_coa)

s.label(e_pca$li)

s.label(e_coa$li)

s.label(eh_pca$co)

s.label(eh_pca$li)

s.label(eh_coa$li)

s.label(eh_coa$co)

## -----------------------------------------------------------------------------
moignard_raw = as.matrix(read.csv("../data/nbt.3154-S3-raw.csv", row.names = 1))
dist2r.euclid = dist(moignard_raw)
dist1r.l1     = dist(moignard_raw, "manhattan")
cells1.cmds = cmdscale(dist1r.l1,     k = 20, eig = TRUE)
cells2.cmds = cmdscale(dist2r.euclid, k = 20, eig = TRUE)
sum(cells1.cmds$eig[1:2]) / sum(cells1.cmds$eig)
## [1] 0.776075
sum(cells2.cmds$eig[1:2]) / sum(cells2.cmds$eig)
## [1] 0.6297133
## -----------------------------------------------------------------------------
library("kernlab")
## 
## Attaching package: 'kernlab'
## The following object is masked from 'package:BiocGenerics':
## 
##     type
## The following object is masked from 'package:permute':
## 
##     how
## The following object is masked from 'package:ggplot2':
## 
##     alpha
laplacedot1 = laplacedot(sigma = 1/3934)
rbfdot1     = rbfdot(sigma = (1/3934)^2 )
Klaplace_cellsn   = kernelMatrix(laplacedot1, blom)
KGauss_cellsn     = kernelMatrix(rbfdot1, blom)
Klaplace_rawcells = kernelMatrix(laplacedot1, moignard_raw)
KGauss_rawcells   = kernelMatrix(rbfdot1, moignard_raw)

## -----------------------------------------------------------------------------
dist1kr = 1 - Klaplace_rawcells
dist2kr = 1 - KGauss_rawcells
dist1kn = 1 - Klaplace_cellsn
dist2kn = 1 - KGauss_cellsn

cells1.kcmds = cmdscale(dist1kr, k = 20, eig = TRUE) 
cells2.kcmds = cmdscale(dist2kr, k = 20, eig = TRUE) 

percentage = function(x, n = 4) round(100 * sum(x[seq_len(n)]) / sum(x[x>0]))
kperc1 = percentage(cells1.kcmds$eig)
kperc2 = percentage(cells2.kcmds$eig)

cellsn1.kcmds = cmdscale(dist1kn, k = 20, eig = TRUE) 
cellsn2.kcmds = cmdscale(dist2kn, k = 20, eig = TRUE)

## -----------------------------------------------------------------------------
colc = rowData(Moignard)$cellcol
library("scatterplot3d")
scatterplot3d(cellsn2.kcmds$points[, 1:3], color=colc, pch = 20,
   xlab = "Axis k1", ylab = "Axis k2", zlab = "Axis k3", angle=15)

scatterplot3d(cellsn2.kcmds$points[, 1:3], color=colc, pch = 20,
   xlab = "Axis k1", ylab = "Axis k2", zlab = "Axis k3", angle = -70)

## -----------------------------------------------------------------------------
library("rgl")
plot3d(cellsn2.kcmds$points[, 1:3], col = colc, size = 3,
       xlab = "Axis1", ylab = "Axis2", zlab = "Axis3")
plot3d(cellsn2.kcmds$points[, c(1,2,4)], col = colc, size = 3,
       xlab = "Axis1", ylab = "Axis2", zlab = "Axis4")
# Using an L1 distance instead.
plot3d(cellsn1.kcmds$points[, 1:3], col = colc, size = 3,
       xlab = "Axis1", ylab = "Axis2", zlab = "Axis3")
plot3d(cellsn1.kcmds$points[, c(1,2,4)], col = colc, size = 3,
       xlab = "Axis1", ylab = "Axis2", zlab = "Axis4")
## -----------------------------------------------------------------------------
library("LPCM")
## 
## Attaching package: 'LPCM'
## The following object is masked from 'package:SummarizedExperiment':
## 
##     coverage
## The following object is masked from 'package:GenomicRanges':
## 
##     coverage
## The following object is masked from 'package:IRanges':
## 
##     coverage
library("diffusionMap")
dmap1 = diffuse(dist1n.l1, neigen = 10)
## Performing eigendecomposition
## Computing Diffusion Coordinates
## Elapsed time: 7.71 seconds
combs = combn(4, 3)
lpcplots = apply(combs, 2, function(j) lpc(dmap1$X[, j], scale = FALSE))
## -----------------------------------------------------------------------------
library("rgl")
for (i in seq_along(lpcplots))
  plot(lpcplots[[i]], type = "l", lwd = 3,
  xlab = paste("Axis", combs[1, i]),
  ylab = paste("Axis", combs[2, i]),
  zlab = paste("Axis", combs[3, i]))

## -----------------------------------------------------------------------------
outlpce134 = lpc(dmap1$X[,c(1,3,4)], scale=FALSE, h=0.5)
plot3d(dmap1$X[,c(1,3,4)], col=colc, pch=20, 
       xlab="Axis1", ylab="Axis3", zlab="Axis4")
plot3d(outlpce134$LPC, type="l", lwd=7, add=TRUE)

outlpce134 = lpc(dmap1$X[,c(1,3,4)], scale=FALSE, h=0.7)
plot3d(outlpce134$LPC, type="l", lwd=7,
       xlab="Axis1", ylab="Axis3", zlab="Axis4")
plot3d(dmap1$X[,c(1,3,4)], col=colc, 
       xlab="", ylab="", zlab="", add=TRUE)
## -----------------------------------------------------------------------------
library("diffusionMap")
dmap2 = diffuse(dist2n.euclid, neigen = 11)
## Performing eigendecomposition
## Computing Diffusion Coordinates
## Elapsed time: 9.69 seconds
dmap1 = diffuse(dist1n.l1, neigen = 11)
## Performing eigendecomposition
## Computing Diffusion Coordinates
## Elapsed time: 7.6 seconds
plot(dmap2)

## -----------------------------------------------------------------------------
library("scatterplot3d")
scp3d = function(axestop = 1:3, dmapRes = dmap1, color = colc,
           anglea = 20, pch = 20)
scatterplot3d(dmapRes$X[, axestop], color = colc,
    xlab = paste("Axis",axestop[1]), ylab = paste("Axis", axestop[2]),
    zlab = paste("Axis",axestop[3]), pch = pch, angle = anglea)
## -----------------------------------------------------------------------------
# TODO: duplicate of the below as a workaround;
# else white space is rendered between lines
scp3d()

scp3d(anglea=310)

scp3d(anglea=210)

scp3d(anglea=150)

## -----------------------------------------------------------------------------
## scp3d()
## scp3d(anglea=310)
## scp3d(anglea=210)
## scp3d(anglea=150)

## -----------------------------------------------------------------------------
# interactive plot
library("rgl")
plot3d(dmap1$X[,1:3], col=colc, size=3)
plot3d(dmap1$X[,2:4], col=colc, size=3)